pyprocessors-metadata_from_categories 1.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,137 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ results.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+
55
+ # Translations
56
+ *.mo
57
+ *.pot
58
+
59
+ # Django stuff:
60
+ *.log
61
+ local_settings.py
62
+ db.sqlite3
63
+ db.sqlite3-journal
64
+
65
+ # Flask stuff:
66
+ instance/
67
+ .webassets-cache
68
+
69
+ # Scrapy stuff:
70
+ .scrapy
71
+
72
+ # Sphinx documentation
73
+ docs/_build/
74
+
75
+ # PyBuilder
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ .python-version
87
+
88
+ # pipenv
89
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
90
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
91
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
92
+ # install all needed dependencies.
93
+ #Pipfile.lock
94
+
95
+ # celery beat schedule file
96
+ celerybeat-schedule
97
+
98
+ # SageMath parsed files
99
+ *.sage.py
100
+
101
+ # Environments
102
+ .env
103
+ .venv
104
+ env/
105
+ venv/
106
+ ENV/
107
+ env.bak/
108
+ venv.bak/
109
+
110
+ # Spyder project settings
111
+ .spyderproject
112
+ .spyproject
113
+
114
+ # Rope project settings
115
+ .ropeproject
116
+
117
+ # mkdocs documentation
118
+ /site
119
+
120
+ # mypy
121
+ .mypy_cache/
122
+ .dmypy.json
123
+ dmypy.json
124
+
125
+ # Pyre type checker
126
+ .pyre/
127
+
128
+ # Specific
129
+ .idea/
130
+ .groovylintrc.json
131
+ .emailNotif
132
+ uv.lock
133
+
134
+ # SBOMs
135
+ **/sbom*.json
136
+ **/trivy*.html
137
+ **/audit*.json
@@ -0,0 +1,5 @@
1
+ # Authors
2
+
3
+ Contributors to pyprocessors_metadata_from_categories include:
4
+
5
+ + [Olivier Terrier](mailto:olivier.terrier@kairntech.com)
@@ -0,0 +1,7 @@
1
+ # Changelog
2
+ All notable changes to this project will be documented in this file.
3
+
4
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
5
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
+
7
+ ## [unreleased]
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Olivier Terrier
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
@@ -0,0 +1,173 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyprocessors-metadata_from_categories
3
+ Version: 1.6.1
4
+ Summary: Sherpa transform annotations to categories processor
5
+ Project-URL: Homepage, https://github.com/oterrier/pyprocessors_metadata_from_categories/
6
+ Author-email: Olivier Terrier <olivier.terrier@kairntech.com>
7
+ License: The MIT License (MIT)
8
+
9
+ Copyright (c) 2021 Olivier Terrier
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in
19
+ all copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
27
+ THE SOFTWARE.
28
+ License-File: AUTHORS.md
29
+ License-File: LICENSE
30
+ Classifier: Development Status :: 4 - Beta
31
+ Classifier: Intended Audience :: Developers
32
+ Classifier: Intended Audience :: Information Technology
33
+ Classifier: Intended Audience :: System Administrators
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Operating System :: OS Independent
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: Topic :: Software Development
38
+ Classifier: Topic :: Software Development :: Libraries
39
+ Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
40
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
41
+ Requires-Python: >=3.12
42
+ Requires-Dist: collections-extended
43
+ Requires-Dist: log-with-context
44
+ Requires-Dist: openpyxl
45
+ Requires-Dist: pandas
46
+ Requires-Dist: pymultirole-plugins<1.7.0,>=1.6.0
47
+ Provides-Extra: dev
48
+ Requires-Dist: bump2version; extra == 'dev'
49
+ Requires-Dist: pre-commit; extra == 'dev'
50
+ Provides-Extra: docs
51
+ Requires-Dist: lxml-html-clean; extra == 'docs'
52
+ Requires-Dist: m2r2; extra == 'docs'
53
+ Requires-Dist: sphinx; extra == 'docs'
54
+ Requires-Dist: sphinx-rtd-theme; extra == 'docs'
55
+ Requires-Dist: sphinxcontrib-apidoc; extra == 'docs'
56
+ Provides-Extra: sbom
57
+ Requires-Dist: cyclonedx-bom; extra == 'sbom'
58
+ Requires-Dist: pip-audit; extra == 'sbom'
59
+ Provides-Extra: test
60
+ Requires-Dist: dirty-equals; extra == 'test'
61
+ Requires-Dist: pip; extra == 'test'
62
+ Requires-Dist: pytest; extra == 'test'
63
+ Requires-Dist: pytest-cov; extra == 'test'
64
+ Requires-Dist: ruff; extra == 'test'
65
+ Description-Content-Type: text/markdown
66
+
67
+ # pyprocessors_metadata_from_categories
68
+
69
+ [![license](https://img.shields.io/github/license/oterrier/pyprocessors_metadata_from_categories)](https://github.com/oterrier/pyprocessors_metadata_from_categories/blob/master/LICENSE)
70
+ [![tests](https://github.com/oterrier/pyprocessors_metadata_from_categories/workflows/tests/badge.svg)](https://github.com/oterrier/pyprocessors_metadata_from_categories/actions?query=workflow%3Atests)
71
+ [![codecov](https://img.shields.io/codecov/c/github/oterrier/pyprocessors_metadata_from_categories)](https://codecov.io/gh/oterrier/pyprocessors_metadata_from_categories)
72
+ [![docs](https://img.shields.io/readthedocs/pyprocessors_metadata_from_categories)](https://pyprocessors_metadata_from_categories.readthedocs.io)
73
+ [![version](https://img.shields.io/pypi/v/pyprocessors_metadata_from_categories)](https://pypi.org/project/pyprocessors_metadata_from_categories/)
74
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pyprocessors_metadata_from_categories)](https://pypi.org/project/pyprocessors_metadata_from_categories/)
75
+
76
+ Turn the categories carried by a document into a metadata value.
77
+
78
+ ## How it works
79
+
80
+ For every input document the processor converts its categories into a metadata
81
+ entry and then clears the categories. Conversion happens **systematically at both
82
+ levels**:
83
+
84
+ - **Document level** — the document's categories are turned into
85
+ `document.metadata[metadata_name]`, then `document.categories` is cleared.
86
+ - **Segment level** — the same conversion is applied to each sentence: its
87
+ categories become `sentence.metadata[metadata_name]`, then the sentence's
88
+ categories are cleared.
89
+
90
+ For a given set of categories, only those whose `score` is greater than or equal
91
+ to `multi_label_threshold` are kept, and the resulting metadata value is:
92
+
93
+ - `None` (the metadata key is removed) when no category passes the threshold,
94
+ - a single value when exactly one category passes,
95
+ - a list of values when several categories pass.
96
+
97
+ ### Parameters
98
+
99
+ | Parameter | Default | Description |
100
+ | ----------------------- | ------- | --------------------------------------------------------------------------- |
101
+ | `metadata_name` | `None` | Name of the generated metadata key. |
102
+ | `multi_label_threshold` | `0.0` | Only categories with a score greater than or equal to this value are kept. |
103
+
104
+ ## Installation
105
+
106
+ You can simply `pip install pyprocessors_metadata_from_categories`.
107
+
108
+ ## Developing
109
+
110
+ ### Pre-requesites
111
+
112
+ You will need to install `uv` (for managing the virtual environment and running tests):
113
+
114
+ ```
115
+ pip install uv
116
+ ```
117
+
118
+ Clone the repository:
119
+
120
+ ```
121
+ git clone https://github.com/oterrier/pyprocessors_metadata_from_categories
122
+ ```
123
+
124
+ ### Running the test suite
125
+
126
+ You can run the full test suite with:
127
+
128
+ ```
129
+ uv run pytest
130
+ ```
131
+
132
+ ### Linting
133
+
134
+ ```
135
+ uv run ruff check .
136
+ uv run ruff format --check .
137
+ ```
138
+
139
+ ### Building the documentation
140
+
141
+ You can build the HTML documentation with:
142
+
143
+ ```
144
+ uv run --extra docs sphinx-build docs docs/_build
145
+ ```
146
+
147
+ The built documentation is available at `docs/_build/index.html`.
148
+
149
+ ## SBOM & vulnerability check
150
+
151
+ Install the SBOM dependencies:
152
+
153
+ ```
154
+ uv sync --extra sbom
155
+ ```
156
+
157
+ Generate a CycloneDX SBOM from the current environment:
158
+
159
+ ```
160
+ uv run cyclonedx-py environment -o sbom.cdx.json --output-format json
161
+ ```
162
+
163
+ Audit dependencies for known vulnerabilities:
164
+
165
+ ```
166
+ uv run pip-audit --format json --output audit-report.json
167
+ ```
168
+
169
+ To fail on any known vulnerability (useful in CI):
170
+
171
+ ```
172
+ uv run pip-audit --strict
173
+ ```
@@ -0,0 +1,107 @@
1
+ # pyprocessors_metadata_from_categories
2
+
3
+ [![license](https://img.shields.io/github/license/oterrier/pyprocessors_metadata_from_categories)](https://github.com/oterrier/pyprocessors_metadata_from_categories/blob/master/LICENSE)
4
+ [![tests](https://github.com/oterrier/pyprocessors_metadata_from_categories/workflows/tests/badge.svg)](https://github.com/oterrier/pyprocessors_metadata_from_categories/actions?query=workflow%3Atests)
5
+ [![codecov](https://img.shields.io/codecov/c/github/oterrier/pyprocessors_metadata_from_categories)](https://codecov.io/gh/oterrier/pyprocessors_metadata_from_categories)
6
+ [![docs](https://img.shields.io/readthedocs/pyprocessors_metadata_from_categories)](https://pyprocessors_metadata_from_categories.readthedocs.io)
7
+ [![version](https://img.shields.io/pypi/v/pyprocessors_metadata_from_categories)](https://pypi.org/project/pyprocessors_metadata_from_categories/)
8
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/pyprocessors_metadata_from_categories)](https://pypi.org/project/pyprocessors_metadata_from_categories/)
9
+
10
+ Turn the categories carried by a document into a metadata value.
11
+
12
+ ## How it works
13
+
14
+ For every input document the processor converts its categories into a metadata
15
+ entry and then clears the categories. Conversion happens **systematically at both
16
+ levels**:
17
+
18
+ - **Document level** — the document's categories are turned into
19
+ `document.metadata[metadata_name]`, then `document.categories` is cleared.
20
+ - **Segment level** — the same conversion is applied to each sentence: its
21
+ categories become `sentence.metadata[metadata_name]`, then the sentence's
22
+ categories are cleared.
23
+
24
+ For a given set of categories, only those whose `score` is greater than or equal
25
+ to `multi_label_threshold` are kept, and the resulting metadata value is:
26
+
27
+ - `None` (the metadata key is removed) when no category passes the threshold,
28
+ - a single value when exactly one category passes,
29
+ - a list of values when several categories pass.
30
+
31
+ ### Parameters
32
+
33
+ | Parameter | Default | Description |
34
+ | ----------------------- | ------- | --------------------------------------------------------------------------- |
35
+ | `metadata_name` | `None` | Name of the generated metadata key. |
36
+ | `multi_label_threshold` | `0.0` | Only categories with a score greater than or equal to this value are kept. |
37
+
38
+ ## Installation
39
+
40
+ You can simply `pip install pyprocessors_metadata_from_categories`.
41
+
42
+ ## Developing
43
+
44
+ ### Pre-requesites
45
+
46
+ You will need to install `uv` (for managing the virtual environment and running tests):
47
+
48
+ ```
49
+ pip install uv
50
+ ```
51
+
52
+ Clone the repository:
53
+
54
+ ```
55
+ git clone https://github.com/oterrier/pyprocessors_metadata_from_categories
56
+ ```
57
+
58
+ ### Running the test suite
59
+
60
+ You can run the full test suite with:
61
+
62
+ ```
63
+ uv run pytest
64
+ ```
65
+
66
+ ### Linting
67
+
68
+ ```
69
+ uv run ruff check .
70
+ uv run ruff format --check .
71
+ ```
72
+
73
+ ### Building the documentation
74
+
75
+ You can build the HTML documentation with:
76
+
77
+ ```
78
+ uv run --extra docs sphinx-build docs docs/_build
79
+ ```
80
+
81
+ The built documentation is available at `docs/_build/index.html`.
82
+
83
+ ## SBOM & vulnerability check
84
+
85
+ Install the SBOM dependencies:
86
+
87
+ ```
88
+ uv sync --extra sbom
89
+ ```
90
+
91
+ Generate a CycloneDX SBOM from the current environment:
92
+
93
+ ```
94
+ uv run cyclonedx-py environment -o sbom.cdx.json --output-format json
95
+ ```
96
+
97
+ Audit dependencies for known vulnerabilities:
98
+
99
+ ```
100
+ uv run pip-audit --format json --output audit-report.json
101
+ ```
102
+
103
+ To fail on any known vulnerability (useful in CI):
104
+
105
+ ```
106
+ uv run pip-audit --strict
107
+ ```
@@ -0,0 +1,39 @@
1
+ # Release Instructions
2
+
3
+ This document guides a contributor through creating a release of pyprocessors_metadata_from_categories.
4
+
5
+ ## Preflight checks
6
+
7
+ ### Ensure all tests pass
8
+
9
+ Locally you can run `tox` to check that all tests pass, and check that tests
10
+ against all supported environments are passing also by checking qsim's
11
+ [GitHub actions](https://github.com/oterrier/pyprocessors_metadata_from_categories/actions?query=branch%3Amaster+workflow%3Atests).
12
+
13
+ #### Verify that `AUTHORS.md` is up-to-date
14
+
15
+ The following command shows the number of commits per author since the last
16
+ annotated tag:
17
+ ```
18
+ t=$(git describe --abbrev=0); echo Commits since $t; git shortlog -s $t..
19
+ ```
20
+
21
+ ## Make the release
22
+
23
+ Run
24
+
25
+ ```
26
+ bumpversion release # bump version from .devX to release version
27
+ git push --tags # push tagged release to upstream
28
+ flit publish # publish to PyPI
29
+ ```
30
+
31
+ ## Start work on the next release
32
+
33
+ Run
34
+
35
+ ```
36
+ bumpversion minor
37
+ ```
38
+
39
+ To start work on the next release
@@ -0,0 +1,100 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "pyprocessors-metadata_from_categories"
7
+ dynamic = ["version"]
8
+ description = "Sherpa transform annotations to categories processor"
9
+ readme = "README.md"
10
+ license = {file = "LICENSE"}
11
+ authors = [
12
+ {name = "Olivier Terrier", email = "olivier.terrier@kairntech.com"},
13
+ ]
14
+ keywords = []
15
+ classifiers = [
16
+ "Intended Audience :: Information Technology",
17
+ "Intended Audience :: Developers",
18
+ "Intended Audience :: System Administrators",
19
+ "Operating System :: OS Independent",
20
+ "Topic :: Software Development :: Libraries :: Application Frameworks",
21
+ "Topic :: Software Development :: Libraries :: Python Modules",
22
+ "Topic :: Software Development :: Libraries",
23
+ "Topic :: Software Development",
24
+ "License :: OSI Approved :: MIT License",
25
+ "Development Status :: 4 - Beta",
26
+ "Programming Language :: Python :: 3.12",
27
+ ]
28
+ requires-python = ">=3.12"
29
+ dependencies = [
30
+ "pymultirole-plugins>=1.6.0,<1.7.0",
31
+ "collections-extended",
32
+ "pandas",
33
+ "openpyxl",
34
+ "log-with-context",
35
+ ]
36
+
37
+ [project.urls]
38
+ Homepage = "https://github.com/oterrier/pyprocessors_metadata_from_categories/"
39
+
40
+ [project.entry-points."pyprocessors.plugins"]
41
+ metadata_from_categories = "pyprocessors_metadata_from_categories.metadata_from_categories:MetadataFromCategoriesProcessor"
42
+
43
+ [project.optional-dependencies]
44
+ test = [
45
+ "pytest",
46
+ "pytest-cov",
47
+ "ruff",
48
+ "pip",
49
+ "dirty-equals",
50
+ ]
51
+ docs = [
52
+ "sphinx",
53
+ "sphinx-rtd-theme",
54
+ "m2r2",
55
+ "sphinxcontrib.apidoc",
56
+ "lxml_html_clean",
57
+ ]
58
+ dev = [
59
+ "pre-commit",
60
+ "bump2version",
61
+ ]
62
+
63
+ sbom = ["cyclonedx-bom", "pip-audit"]
64
+
65
+ [tool.hatch.version]
66
+ path = "src/pyprocessors_metadata_from_categories/__init__.py"
67
+
68
+ [tool.hatch.build.targets.wheel]
69
+ packages = ["src/pyprocessors_metadata_from_categories"]
70
+
71
+ [tool.hatch.build]
72
+ exclude = [
73
+ "/tests",
74
+ "/docs",
75
+ "Jenkinsfile",
76
+ "Dockerfile",
77
+ "bumpversion.py",
78
+ "mypy.ini",
79
+ "hgnc_cache.sqlite",
80
+ "trivy-html-template.tpl",
81
+ "MIGRATION.md",
82
+ ".gitignore",
83
+ ".dockerignore"
84
+ ]
85
+
86
+ [tool.pytest.ini_options]
87
+ addopts = "--durations=5"
88
+ norecursedirs = ["docs"]
89
+
90
+ [tool.ruff]
91
+ line-length = 120
92
+ target-version = "py312"
93
+
94
+ [tool.ruff.lint]
95
+ select = ["E", "W", "F", "I", "B", "C4", "UP", "ARG", "SIM"]
96
+ ignore = ["E501"]
97
+
98
+ [tool.ruff.format]
99
+ quote-style = "double"
100
+ indent-style = "space"
@@ -0,0 +1,3 @@
1
+ """Sherpa transform labels to metadata"""
2
+
3
+ __version__ = "1.6.1"
@@ -0,0 +1,56 @@
1
+ from typing import cast
2
+
3
+ from log_with_context import Logger, add_logging_context
4
+ from pydantic import BaseModel, Field
5
+ from pymultirole_plugins.v1.processor import ProcessorBase, ProcessorParameters
6
+ from pymultirole_plugins.v1.schema import Document
7
+
8
+ logger = Logger("pymultirole")
9
+
10
+
11
+ class MetadataFromCategoriesParameters(ProcessorParameters):
12
+ metadata_name: str = Field(None, description="Name of the generated metadata")
13
+ multi_label_threshold: float = Field(
14
+ 0.0, description="only categories with a score greater than threshold are kept"
15
+ )
16
+
17
+
18
+ class MetadataFromCategoriesProcessor(ProcessorBase):
19
+ """Create categories from annotations"""
20
+
21
+ def process(self, documents: list[Document], parameters: ProcessorParameters) -> list[Document]:
22
+ params: MetadataFromCategoriesParameters = cast(MetadataFromCategoriesParameters, parameters)
23
+ for document in documents:
24
+ with add_logging_context(docid=document.identifier):
25
+ # Document level
26
+ mvals = create_metadata_from_categories(document.categories, params.multi_label_threshold)
27
+ if mvals is None:
28
+ if params.metadata_name in document.metadata:
29
+ del document.metadata[params.metadata_name]
30
+ else:
31
+ document.metadata[params.metadata_name] = mvals
32
+ document.categories = None
33
+
34
+ # Segment level
35
+ for sent in document.sentences or []:
36
+ mvals = create_metadata_from_categories(sent.categories, params.multi_label_threshold)
37
+ if mvals is None:
38
+ if params.metadata_name in sent.metadata:
39
+ del sent.metadata[params.metadata_name]
40
+ else:
41
+ sent.metadata[params.metadata_name] = mvals
42
+ sent.categories = None
43
+ return documents
44
+
45
+ @classmethod
46
+ def get_model(cls) -> type[BaseModel]:
47
+ return MetadataFromCategoriesParameters
48
+
49
+
50
+ def create_metadata_from_categories(categories, threshold=0.0):
51
+ values = []
52
+ if categories:
53
+ for c in categories:
54
+ if c.score and c.score >= threshold:
55
+ values.append(c.label or c.labelName)
56
+ return values if len(values) > 1 else (values[0] if len(values) == 1 else None)