datamaestro 1.5.2__tar.gz → 1.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. datamaestro-1.6.1/.flake8 +7 -0
  2. {datamaestro-1.5.2 → datamaestro-1.6.1}/.github/workflows/pytest.yml +1 -1
  3. datamaestro-1.6.1/.github/workflows/python-publish.yml +45 -0
  4. {datamaestro-1.5.2 → datamaestro-1.6.1}/.pre-commit-config.yaml +3 -3
  5. {datamaestro-1.5.2 → datamaestro-1.6.1}/PKG-INFO +1 -1
  6. datamaestro-1.6.1/cliff.toml +65 -0
  7. datamaestro-1.6.1/release-notes.md +5 -0
  8. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/__init__.py +0 -1
  9. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/__main__.py +2 -1
  10. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/context.py +22 -13
  11. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/definitions.py +1 -4
  12. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/version.py +1 -1
  13. datamaestro-1.5.2/.github/workflows/python-publish.yml +0 -29
  14. {datamaestro-1.5.2 → datamaestro-1.6.1}/.coverage +0 -0
  15. {datamaestro-1.5.2 → datamaestro-1.6.1}/.gitignore +0 -0
  16. {datamaestro-1.5.2 → datamaestro-1.6.1}/.python-version +0 -0
  17. {datamaestro-1.5.2 → datamaestro-1.6.1}/.readthedocs.yml +0 -0
  18. {datamaestro-1.5.2 → datamaestro-1.6.1}/CHANGELOG.md +0 -0
  19. {datamaestro-1.5.2 → datamaestro-1.6.1}/LICENSE +0 -0
  20. {datamaestro-1.5.2 → datamaestro-1.6.1}/MANIFEST.in +0 -0
  21. {datamaestro-1.5.2 → datamaestro-1.6.1}/README.md +0 -0
  22. {datamaestro-1.5.2 → datamaestro-1.6.1}/TODO.md +0 -0
  23. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/Makefile +0 -0
  24. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/make.bat +0 -0
  25. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/requirements.txt +0 -0
  26. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/source/api/data.md +0 -0
  27. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/source/api/download.rst +0 -0
  28. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/source/api/index.md +0 -0
  29. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/source/api/records.rst +0 -0
  30. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/source/conf.py +0 -0
  31. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/source/datasets.rst +0 -0
  32. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/source/developping.md +0 -0
  33. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/source/index.md +0 -0
  34. {datamaestro-1.5.2 → datamaestro-1.6.1}/docs/source/style.css +0 -0
  35. {datamaestro-1.5.2 → datamaestro-1.6.1}/pyproject.toml +0 -0
  36. {datamaestro-1.5.2 → datamaestro-1.6.1}/pytest.ini +0 -0
  37. {datamaestro-1.5.2 → datamaestro-1.6.1}/requirements-dev.txt +0 -0
  38. {datamaestro-1.5.2 → datamaestro-1.6.1}/requirements.txt +0 -0
  39. {datamaestro-1.5.2 → datamaestro-1.6.1}/schema.yaml +0 -0
  40. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/annotations/__init__.py +0 -0
  41. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/annotations/agreement.py +0 -0
  42. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/commands/__init__.py +0 -0
  43. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/commands/mainstyle.css +0 -0
  44. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/commands/site.py +0 -0
  45. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/data/__init__.py +0 -0
  46. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/data/csv.py +0 -0
  47. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/data/huggingface.py +0 -0
  48. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/data/ml.py +0 -0
  49. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/data/tensor.py +0 -0
  50. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/download/__init__.py +0 -0
  51. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/download/archive.py +0 -0
  52. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/download/custom.py +0 -0
  53. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/download/huggingface.py +0 -0
  54. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/download/links.py +0 -0
  55. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/download/manual.py +0 -0
  56. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/download/multiple.py +0 -0
  57. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/download/single.py +0 -0
  58. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/download/sync.py +0 -0
  59. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/download/todo.py +0 -0
  60. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/download/wayback.py +0 -0
  61. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/record.py +0 -0
  62. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/registry.py +0 -0
  63. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/search.py +0 -0
  64. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/settings.py +0 -0
  65. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/sphinx.py +0 -0
  66. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/stream/__init__.py +0 -0
  67. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/stream/compress.py +0 -0
  68. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/stream/lines.py +0 -0
  69. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/templates/dataset.py +0 -0
  70. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/test/__init__.py +0 -0
  71. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/test/checks.py +0 -0
  72. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/test/conftest.py +0 -0
  73. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/test/test_annotations.py +0 -0
  74. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/test/test_download_handlers.py +0 -0
  75. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/test/test_record.py +0 -0
  76. {datamaestro-1.5.2 → datamaestro-1.6.1}/src/datamaestro/utils.py +0 -0
  77. {datamaestro-1.5.2 → datamaestro-1.6.1}/tox.ini +0 -0
  78. {datamaestro-1.5.2 → datamaestro-1.6.1}/uv.lock +0 -0
@@ -0,0 +1,7 @@
1
+ [flake8]
2
+ extend-ignore = E203, E266, E501, W503, E704
3
+ # line length is intentionally set to 80 here because black uses Bugbear
4
+ # See https://github.com/psf/black/blob/master/docs/the_black_code_style.md#line-length for more details
5
+ max-line-length = 80
6
+ max-complexity = 18
7
+ select = B,C,E,F,W,T4,B9
@@ -15,7 +15,7 @@ jobs:
15
15
  runs-on: ubuntu-latest
16
16
  strategy:
17
17
  matrix:
18
- python-version: ["3.9", "3.10", "3.11"]
18
+ python-version: ["3.10", "3.11", "3.12"]
19
19
 
20
20
  steps:
21
21
  - uses: actions/checkout@v2
@@ -0,0 +1,45 @@
1
+ # This workflow will upload a Python Package using Twine when a release is created
2
+ # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3
+
4
+ name: Upload Python Package
5
+
6
+ on:
7
+ release:
8
+ types: [created]
9
+
10
+ jobs:
11
+ deploy:
12
+ runs-on: ubuntu-latest
13
+
14
+ permissions:
15
+ contents: write # Required to update release notes
16
+ id-token: write # Required for PyPI trusted publishing
17
+
18
+ steps:
19
+ - uses: actions/checkout@v4
20
+ - name: Set up Python
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: "3.x"
24
+
25
+ - name: Generate changelog for this release
26
+ uses: orhun/git-cliff-action@v4
27
+ with:
28
+ config: cliff.toml
29
+ args: --latest --strip header
30
+ env:
31
+ OUTPUT: release-notes.md
32
+
33
+ - name: Update release notes
34
+ uses: softprops/action-gh-release@v2
35
+ with:
36
+ body_path: release-notes.md
37
+
38
+ - name: Install dependencies
39
+ run: python -m pip install --upgrade uv
40
+
41
+ - name: Build
42
+ run: uv build
43
+
44
+ - name: Publish package to PyPI
45
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -1,16 +1,16 @@
1
1
  repos:
2
2
  - repo: https://github.com/pre-commit/pre-commit-hooks
3
- rev: v4.4.0
3
+ rev: v6.0.0
4
4
  hooks:
5
5
  - id: check-yaml
6
6
  - id: end-of-file-fixer
7
7
  - id: trailing-whitespace
8
8
  - repo: https://github.com/psf/black
9
- rev: 23.3.0
9
+ rev: 25.12.0
10
10
  hooks:
11
11
  - id: black
12
12
  - repo: https://github.com/pycqa/flake8
13
- rev: 6.0.0
13
+ rev: 7.3.0
14
14
  hooks:
15
15
  - id: flake8
16
16
  additional_dependencies:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: datamaestro
3
- Version: 1.5.2
3
+ Version: 1.6.1
4
4
  Summary: Add your description here
5
5
  Author-email: Benjamin Piwowarski <benjamin@piwowarski.fr>
6
6
  License-File: LICENSE
@@ -0,0 +1,65 @@
1
+ # git-cliff configuration file
2
+ # See https://git-cliff.org/docs/configuration
3
+
4
+ [changelog]
5
+ header = """
6
+ # Changelog\n
7
+ All notable changes to this project will be documented in this file.\n
8
+ """
9
+ body = """
10
+ {% if version %}\
11
+ ## [{{ version | trim_start_matches(pat="v") }}] - {{ timestamp | date(format="%Y-%m-%d") }}
12
+ {% else %}\
13
+ ## [Unreleased]
14
+ {% endif %}\
15
+ {% for group, commits in commits | group_by(attribute="group") %}
16
+ ### {{ group | striptags | trim | upper_first }}
17
+ {% for commit in commits
18
+ | filter(attribute="scope")
19
+ | sort(attribute="scope") %}
20
+ - **{{ commit.scope }}**: {{ commit.message | upper_first }} ([{{ commit.id | truncate(length=7, end="") }}](https://github.com/experimaestro/experimaestro-python/commit/{{ commit.id }}))
21
+ {%- endfor -%}
22
+ {% for commit in commits %}
23
+ {%- if not commit.scope -%}
24
+ - {{ commit.message | upper_first }} ([{{ commit.id | truncate(length=7, end="") }}](https://github.com/experimaestro/experimaestro-python/commit/{{ commit.id }}))
25
+ {% endif -%}
26
+ {%- endfor -%}
27
+ {% endfor %}
28
+ """
29
+ trim = true
30
+ footer = ""
31
+
32
+ [git]
33
+ conventional_commits = true
34
+ filter_unconventional = false
35
+ split_commits = false
36
+ commit_preprocessors = []
37
+ commit_parsers = [
38
+ { message = "^feat", group = "Features" },
39
+ { message = "^fix", group = "Bug Fixes" },
40
+ { message = "^doc", group = "Documentation" },
41
+ { message = "^perf", group = "Performance" },
42
+ { message = "^refactor", group = "Refactor" },
43
+ { message = "^style", group = "Styling" },
44
+ { message = "^test", group = "Testing" },
45
+ { message = "^bump: version", skip = true },
46
+ { message = "^chore\\(release\\): prepare for", skip = true },
47
+ { message = "^chore\\(deps.*\\)", skip = true },
48
+ { message = "^chore\\(pr\\)", skip = true },
49
+ { message = "^chore\\(pull\\)", skip = true },
50
+ { message = "^build\\(deps", skip = true },
51
+ { message = "^chore", group = "Miscellaneous Tasks" },
52
+ { message = "^ci", group = "CI" },
53
+ { message = "^build", group = "Build" },
54
+ { message = "^revert", group = "Revert" },
55
+ { body = ".*security", group = "Security" },
56
+ ]
57
+ protect_breaking_commits = false
58
+ filter_commits = false
59
+ tag_pattern = "v[0-9].*"
60
+ skip_tags = ""
61
+ ignore_tags = ""
62
+ topo_order = false
63
+ sort_commits = "oldest"
64
+ # Skip versions with no commits (after filtering)
65
+ skip_empty = true
@@ -0,0 +1,5 @@
1
+ ## [1.6.1] - 2025-12-24
2
+
3
+ ### CI
4
+ - Upgrade gh plugin versions ([2738e42](https://github.com/experimaestro/experimaestro-python/commit/2738e42760d8531573844ec6cfedfebd4565ed97))
5
+
@@ -7,7 +7,6 @@ from .context import (
7
7
  prepare_dataset,
8
8
  )
9
9
 
10
- from pkg_resources import get_distribution, DistributionNotFound
11
10
  from .definitions import dataset, metadata
12
11
  from .data import Base
13
12
  from .version import __version__
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env python3
2
2
  # flake8: noqa: T201
3
3
 
4
+ from importlib.metadata import entry_points
4
5
  import sys
5
6
  import logging
6
7
  from functools import update_wrapper
@@ -38,7 +39,7 @@ def pass_cfg(f):
38
39
  # Get all the available repositories
39
40
 
40
41
  REPOSITORIES = {}
41
- for entry_point in pkg_resources.iter_entry_points("datamaestro.repositories"):
42
+ for entry_point in entry_points(group="datamaestro.repositories"):
42
43
  REPOSITORIES[entry_point.name] = entry_point
43
44
 
44
45
 
@@ -8,8 +8,7 @@ import inspect
8
8
  import json
9
9
  from abc import ABC, abstractmethod
10
10
  from experimaestro import Config
11
- import pkg_resources
12
- from experimaestro.compat import cached_property
11
+ from functools import cached_property
13
12
  from experimaestro.mkdocs.metaloader import Module
14
13
  from .utils import CachedFile, downloadURL
15
14
  from .settings import UserSettings, Settings
@@ -18,6 +17,22 @@ from typing import TYPE_CHECKING
18
17
  if TYPE_CHECKING:
19
18
  from datamaestro.definitions import AbstractDataset, DatasetWrapper
20
19
 
20
+ from importlib.metadata import (
21
+ entry_points as _entry_points,
22
+ version as _version,
23
+ PackageNotFoundError as _PackageNotFoundError,
24
+ )
25
+
26
+
27
+ def iter_entry_points(group, name=None):
28
+ """Yield entry points for a given group (and optional name) using importlib.metadata."""
29
+ eps = _entry_points()
30
+ selected = eps.select(group=group)
31
+ if name:
32
+ selected = [ep for ep in selected if ep.name == name]
33
+ for ep in selected:
34
+ yield ep
35
+
21
36
 
22
37
  class Compression:
23
38
  @staticmethod
@@ -106,7 +121,7 @@ class Context:
106
121
 
107
122
  def repositories(self) -> Iterable["Repository"]:
108
123
  """Returns an iterator over repositories"""
109
- for entry_point in pkg_resources.iter_entry_points("datamaestro.repositories"):
124
+ for entry_point in iter_entry_points("datamaestro.repositories"):
110
125
  yield entry_point.load().instance()
111
126
 
112
127
  def repository(self, repositoryid):
@@ -114,10 +129,7 @@ class Context:
114
129
  return None
115
130
 
116
131
  entry_points = [
117
- x
118
- for x in pkg_resources.iter_entry_points(
119
- "datamaestro.repositories", repositoryid
120
- )
132
+ x for x in iter_entry_points("datamaestro.repositories", repositoryid)
121
133
  ]
122
134
  if not entry_points:
123
135
  raise Exception("No datasets repository named %s", repositoryid)
@@ -299,8 +311,7 @@ class BaseRepository(ABC):
299
311
  self.basedir = Path(p).parent
300
312
 
301
313
  @abstractmethod
302
- def __iter__(self) -> Iterator["AbstractDataset"]:
303
- ...
314
+ def __iter__(self) -> Iterator["AbstractDataset"]: ...
304
315
 
305
316
  def search(self, name: str):
306
317
  """Search for a dataset in the definitions"""
@@ -353,11 +364,9 @@ class Repository(BaseRepository):
353
364
 
354
365
  @classmethod
355
366
  def version(cls):
356
- from pkg_resources import get_distribution, DistributionNotFound
357
-
358
367
  try:
359
- return get_distribution(cls.__module__).version
360
- except DistributionNotFound:
368
+ return _version(cls.__module__)
369
+ except _PackageNotFoundError:
361
370
  return None
362
371
 
363
372
  def __repr__(self):
@@ -21,8 +21,6 @@ from typing import (
21
21
  _GenericAlias,
22
22
  )
23
23
  from experimaestro import ( # noqa: F401 (re-exports)
24
- argument,
25
- constant,
26
24
  Param,
27
25
  Option,
28
26
  Config,
@@ -183,8 +181,7 @@ class AbstractDataset(AbstractData):
183
181
  self.hooks[hookname].append(hook)
184
182
 
185
183
  @abstractmethod
186
- def _prepare(self) -> "Base":
187
- ...
184
+ def _prepare(self) -> "Base": ...
188
185
 
189
186
  def format(self, encoder: str) -> str:
190
187
  s = self.prepare()
@@ -1,4 +1,4 @@
1
1
  # This file is auto-generated by Hatchling. As such, do not:
2
2
  # - modify
3
3
  # - track in version control e.g. be sure to add to .gitignore
4
- __version__ = VERSION = '1.5.2'
4
+ __version__ = VERSION = '1.6.1'
@@ -1,29 +0,0 @@
1
- # This workflow will upload a Python Package using Twine when a release is created
2
- # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3
-
4
- name: Upload Python Package
5
-
6
- on:
7
- release:
8
- types: [created]
9
-
10
- jobs:
11
- deploy:
12
- runs-on: ubuntu-latest
13
-
14
- steps:
15
- - uses: actions/checkout@v2
16
- - name: Set up Python
17
- uses: actions/setup-python@v2
18
- with:
19
- python-version: "3.x"
20
- - name: Install dependencies
21
- run: |
22
- python -m pip install --upgrade setuptools twine uv
23
- - name: Build and publish
24
- env:
25
- TWINE_USERNAME: __token__
26
- TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
27
- run: |
28
- uv build
29
- twine upload dist/*
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes