sample_data_factory 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ COPYRIGHT NOTICE
2
+ ----------------------------------------------------------------------------
3
+ Copyright (c) 2023, Bright Edge eServices. All rights reserved.
4
+
5
+ Unauthorized copying, distribution, modification, public display, or public
6
+ performance of this software, or any portion of it, is strictly prohibited. This
7
+ software is proprietary to Bright Edge eServices and is protected by South
8
+ African copyright laws and international treaty provisions.
9
+
10
+ No part of this software may be reproduced or transmitted in any form or by any
11
+ means, electronic or mechanical, including photocopying, recording, or by any
12
+ information storage and retrieval system, without the express written permission
13
+ of Bright Edge eServices.
14
+
15
+ Any use, copying, or distribution of this software not in accordance with this
16
+ notice is expressly prohibited and may result in severe civil and criminal
17
+ penalties.
@@ -0,0 +1,135 @@
1
+ Metadata-Version: 2.4
2
+ Name: sample_data_factory
3
+ Version: 0.7.0
4
+ Summary: Reusable test data factory for CSV and FIDE XML archives.
5
+ License-Expression: MIT
6
+ License-File: LICENSE.txt
7
+ Author: Hendrik du Toit
8
+ Author-email: hendrik@brightedge.co.za
9
+ Maintainer: Hendrik du Toit
10
+ Maintainer-email: hendrikdt@citiqprepaid.co.za
11
+ Requires-Python: >=3.12
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Information Technology
15
+ Classifier: Intended Audience :: System Administrators
16
+ Classifier: License :: OSI Approved :: MIT License
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Classifier: Topic :: System :: Archiving :: Packaging
23
+ Project-URL: Homepage, https://github.com/RealTimeEvents/sample_data_factory
24
+ Project-URL: Issues, https://github.com/RealTimeEvents/sample_data_factory/issues
25
+ Project-URL: Repository, https://github.com/RealTimeEvents/sample_data_factory.git
26
+ Project-URL: changelog, https://github.com/RealTimeEvents/sample_data_factory/releases
27
+ Description-Content-Type: text/markdown
28
+
29
+ # sample_data_factory
30
+
31
+ ______________________________________________________________________
32
+
33
+ ## Short description
34
+
35
+ `sample_data_factory` is a reusable Python helper for generating deterministic ZIP archives used in test flows, including URS CSV exports and FIDE players XML exports.
36
+
37
+ ______________________________________________________________________
38
+
39
+ ## Module Overview
40
+
41
+ ### Key Features
42
+
43
+ - Builds URS publication ZIP archives containing CSV payloads.
44
+ - Builds FIDE players-list ZIP archives containing XML payloads.
45
+ - Supports either local byte output or upload to Google Drive.
46
+ - Can create nested Google Drive sub-folders when uploading files.
47
+ - Includes unit tests for archive generation and constructor validation.
48
+
49
+ ### Project Structure
50
+
51
+ - `src/sdf/`: Core package implementation (`sample_data_factory.py`).
52
+ - `tests/unit/`: Unit tests for archive builders and helper methods.
53
+ - `scripts/`: SQL/bootstrap assets (legacy resources have been removed).
54
+ - `legacy/`: Archived resources excluded from normal test runs.
55
+ - `*.ps1`: Environment and dependency setup scripts.
56
+
57
+ ______________________________________________________________________
58
+
59
+ ## Getting Started
60
+
61
+ ### Prerequisites
62
+
63
+ - Python 3.12+
64
+ - Poetry
65
+
66
+ ### Setup
67
+
68
+ ```powershell
69
+ # 1) Generate .env values from environment variables
70
+ .\SetupDotEnv.ps1
71
+
72
+ # 2) Configure private Poetry sources when required
73
+ .\SetupPrivateRepoAccess.ps1
74
+
75
+ # 3) Optional: configure GitHub CLI access
76
+ .\SetupGitHubAccess.ps1
77
+
78
+ # 4) Install and sync project dependencies
79
+ .\InstallDevEnv.ps1
80
+
81
+ # 5) Run tests
82
+ poetry run pytest
83
+ ```
84
+
85
+ ### Usage Example
86
+
87
+ ```python
88
+ from datetime import date
89
+
90
+ from sdf.sample_data_factory import SampleDataFactory
91
+
92
+ factory = SampleDataFactory(
93
+ data_structure={
94
+ "headers": ["PlayerID", "PlayerName"],
95
+ "rows": [["1", "Player One"], ["2", "Player Two"]],
96
+ },
97
+ drive=None,
98
+ file_prefix="Players",
99
+ out_file_date=date(2026, 1, 1),
100
+ target_folder_id=None,
101
+ sub_folder_name=None,
102
+ )
103
+
104
+ archive_bytes = factory.build_urs_rating_pub_zip()
105
+ ```
106
+
107
+ ### Common Commands
108
+
109
+ ```powershell
110
+ poetry install
111
+ poetry run pytest
112
+ poetry run pytest --cov=src --cov=tests --cov-report=term-missing
113
+ poetry run black src tests
114
+ poetry run isort src tests
115
+ poetry run flake8 src tests
116
+ poetry run pre-commit run --all-files
117
+ ```
118
+
119
+ ______________________________________________________________________
120
+
121
+ ## Automation Scripts
122
+
123
+ - `InstallPy.ps1`: Bootstraps Python/Poetry setup.
124
+ - `InstallDevEnv.ps1`: Installs development dependencies and pre-commit hooks.
125
+ - `SetupDotEnv.ps1`: Generates `.env` from required environment variables.
126
+ - `SetupPrivateRepoAccess.ps1`: Configures private package source credentials.
127
+ - `SetupGitHubAccess.ps1`: Configures GitHub authentication for local automation.
128
+
129
+ ______________________________________________________________________
130
+
131
+ ## Active Workflows
132
+
133
+ - `.github/workflows/py-temp-pr-pub-no_docker-def.yaml`: Pull request validation workflow.
134
+ - `.github/workflows/py-temp-publish-pub-build_release_notify_after_merge-def.yaml`: Post-merge release and publish workflow.
135
+
@@ -0,0 +1,106 @@
1
+ # sample_data_factory
2
+
3
+ ______________________________________________________________________
4
+
5
+ ## Short description
6
+
7
+ `sample_data_factory` is a reusable Python helper for generating deterministic ZIP archives used in test flows, including URS CSV exports and FIDE players XML exports.
8
+
9
+ ______________________________________________________________________
10
+
11
+ ## Module Overview
12
+
13
+ ### Key Features
14
+
15
+ - Builds URS publication ZIP archives containing CSV payloads.
16
+ - Builds FIDE players-list ZIP archives containing XML payloads.
17
+ - Supports either local byte output or upload to Google Drive.
18
+ - Can create nested Google Drive sub-folders when uploading files.
19
+ - Includes unit tests for archive generation and constructor validation.
20
+
21
+ ### Project Structure
22
+
23
+ - `src/sdf/`: Core package implementation (`sample_data_factory.py`).
24
+ - `tests/unit/`: Unit tests for archive builders and helper methods.
25
+ - `scripts/`: SQL/bootstrap assets (legacy resources have been removed).
26
+ - `legacy/`: Archived resources excluded from normal test runs.
27
+ - `*.ps1`: Environment and dependency setup scripts.
28
+
29
+ ______________________________________________________________________
30
+
31
+ ## Getting Started
32
+
33
+ ### Prerequisites
34
+
35
+ - Python 3.12+
36
+ - Poetry
37
+
38
+ ### Setup
39
+
40
+ ```powershell
41
+ # 1) Generate .env values from environment variables
42
+ .\SetupDotEnv.ps1
43
+
44
+ # 2) Configure private Poetry sources when required
45
+ .\SetupPrivateRepoAccess.ps1
46
+
47
+ # 3) Optional: configure GitHub CLI access
48
+ .\SetupGitHubAccess.ps1
49
+
50
+ # 4) Install and sync project dependencies
51
+ .\InstallDevEnv.ps1
52
+
53
+ # 5) Run tests
54
+ poetry run pytest
55
+ ```
56
+
57
+ ### Usage Example
58
+
59
+ ```python
60
+ from datetime import date
61
+
62
+ from sdf.sample_data_factory import SampleDataFactory
63
+
64
+ factory = SampleDataFactory(
65
+ data_structure={
66
+ "headers": ["PlayerID", "PlayerName"],
67
+ "rows": [["1", "Player One"], ["2", "Player Two"]],
68
+ },
69
+ drive=None,
70
+ file_prefix="Players",
71
+ out_file_date=date(2026, 1, 1),
72
+ target_folder_id=None,
73
+ sub_folder_name=None,
74
+ )
75
+
76
+ archive_bytes = factory.build_urs_rating_pub_zip()
77
+ ```
78
+
79
+ ### Common Commands
80
+
81
+ ```powershell
82
+ poetry install
83
+ poetry run pytest
84
+ poetry run pytest --cov=src --cov=tests --cov-report=term-missing
85
+ poetry run black src tests
86
+ poetry run isort src tests
87
+ poetry run flake8 src tests
88
+ poetry run pre-commit run --all-files
89
+ ```
90
+
91
+ ______________________________________________________________________
92
+
93
+ ## Automation Scripts
94
+
95
+ - `InstallPy.ps1`: Bootstraps Python/Poetry setup.
96
+ - `InstallDevEnv.ps1`: Installs development dependencies and pre-commit hooks.
97
+ - `SetupDotEnv.ps1`: Generates `.env` from required environment variables.
98
+ - `SetupPrivateRepoAccess.ps1`: Configures private package source credentials.
99
+ - `SetupGitHubAccess.ps1`: Configures GitHub authentication for local automation.
100
+
101
+ ______________________________________________________________________
102
+
103
+ ## Active Workflows
104
+
105
+ - `.github/workflows/py-temp-pr-pub-no_docker-def.yaml`: Pull request validation workflow.
106
+ - `.github/workflows/py-temp-publish-pub-build_release_notify_after_merge-def.yaml`: Post-merge release and publish workflow.
@@ -0,0 +1,122 @@
1
+ [build-system]
2
+ requires = [
3
+ "poetry-core>=2.0.0,<3.0.0",
4
+ ]
5
+ build-backend = "poetry.core.masonry.api"
6
+
7
+ [project]
8
+ name = "sample_data_factory"
9
+ version = "0.7.0"
10
+ description = "Reusable test data factory for CSV and FIDE XML archives."
11
+ authors = [
12
+ { name = "Hendrik du Toit", email = "hendrik@brightedge.co.za" }
13
+ ]
14
+ classifiers = [
15
+ "Development Status :: 4 - Beta",
16
+ "Intended Audience :: Developers",
17
+ "Intended Audience :: Information Technology",
18
+ "Intended Audience :: System Administrators",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Programming Language :: Python :: 3.13",
24
+ "Topic :: Software Development :: Libraries :: Python Modules",
25
+ "Topic :: System :: Archiving :: Packaging",
26
+ ]
27
+ dependencies = [
28
+ ]
29
+ license = "MIT"
30
+ #license = "Proprietary"
31
+ license-files = ["LICENSE.txt"]
32
+ maintainers = [
33
+ { name = "Hendrik du Toit", email = "hendrikdt@citiqprepaid.co.za" },
34
+ { name = "Henru du Toit", email = "henru@brightedge.co.za" },
35
+ { name = "Dirk du Toit", email = "dirk@brightedge.co.za" },
36
+ ]
37
+ readme = { file = "README.md", content-type = "text/markdown" }
38
+ requires-python = ">=3.12"
39
+ packages = [{include = "sdf", from = "src"}]
40
+
41
+ [project.urls]
42
+ # documentation = "https://readthedocs.org"
43
+ Issues = "https://github.com/RealTimeEvents/sample_data_factory/issues"
44
+ changelog = "https://github.com/RealTimeEvents/sample_data_factory/releases"
45
+ Homepage = "https://github.com/RealTimeEvents/sample_data_factory"
46
+ Repository = "https://github.com/RealTimeEvents/sample_data_factory.git"
47
+
48
+ [tool.black]
49
+ line-length = 120
50
+ target-version = [
51
+ "py313",
52
+ ]
53
+ extend-exclude = """
54
+ (
55
+ ^tests/testdata.py
56
+ )
57
+ """
58
+
59
+ [tool.codespell]
60
+ count = ""
61
+ quiet-level = 2
62
+ skip = "working/*,legacy/*"
63
+ ignore-words-list = "space-holder"
64
+ write-changes = ""
65
+
66
+ [tool.coverage.run]
67
+ source = [
68
+ "src",
69
+ "tests"
70
+ ]
71
+ omit = [
72
+ "./legacy/*",
73
+ "./tests/legacy/*"
74
+ ]
75
+
76
+ [tool.isort]
77
+ profile = "black"
78
+
79
+ [tool.poetry]
80
+ packages = [
81
+ { include = "sdf", from = "src" },
82
+ ]
83
+
84
+ [tool.poetry.dependencies]
85
+
86
+ [tool.poetry.group.dev]
87
+ optional = true
88
+
89
+ [tool.poetry.group.dev.dependencies]
90
+ black = ">=25.1.0"
91
+ codecov = ">=2.1.13"
92
+ flake8 = ">=7.1.1"
93
+ isort = "^5.13.2"
94
+ mdformat-gfm = ">=0.4.1"
95
+ mdformat-frontmatter = ">=2.0.8"
96
+ mdformat-footnote = ">=0.1.1"
97
+ pre-commit = ">=4.0.1"
98
+ pygments = "^2.19.1"
99
+ pytest = ">=8.3.4"
100
+ pytest-cov = ">=6.0.0"
101
+ sphinx = ">=8.1.3"
102
+ twine = ">=6.1.0"
103
+
104
+ [tool.pytest.ini_options]
105
+ norecursedirs = ["tests/legacy"]
106
+ addopts = [
107
+ "-vv",
108
+ "--ignore-glob=*/Archive",
109
+ "--ignore=/legacy",
110
+ "--ignore=tests/legacy",
111
+ ]
112
+ filterwarnings = [
113
+ # "ignore::DeprecationWarning",
114
+ ]
115
+ pythonpath = [
116
+ "src",
117
+ "tests",
118
+ ]
119
+ testpaths = "tests"
120
+ markers = [
121
+ "select: Run a selection of tests",
122
+ ]
File without changes
@@ -0,0 +1,152 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ import io
5
+ import tempfile
6
+ import zipfile
7
+ from datetime import date
8
+ from pathlib import Path
9
+ from typing import Any
10
+ from typing import Protocol
11
+ from xml.etree import ElementTree as ET
12
+
13
+ GOOGLE_DRIVE_FOLDER_MIME = "application/vnd.google-apps.folder"
14
+
15
+
16
+ class GoogleDriveInterface(Protocol):
17
+ service: Any
18
+
19
+ def list_children(self, folder_id: str) -> list[dict[str, str]]:
20
+ pass
21
+
22
+ def upload_file(self, local_path: Path, folder_id: str | None) -> str:
23
+ pass
24
+
25
+
26
+ class SampleDataFactory:
27
+ """Test helper for generating publication archives for URS and FIDE test flows."""
28
+
29
+ def __init__(
30
+ self,
31
+ data_structure: dict,
32
+ drive: GoogleDriveInterface | None,
33
+ file_prefix: str,
34
+ out_file_date: date,
35
+ target_folder_id: str | None,
36
+ sub_folder_name: Path | None,
37
+ ):
38
+ self.data_structure = data_structure
39
+ self.drive = drive
40
+ self.file_date = out_file_date
41
+ self.file_prefix = file_prefix
42
+ self.sub_folder_name = sub_folder_name
43
+ self.target_folder_id = target_folder_id
44
+
45
+ if self.drive is None and self.sub_folder_name is not None:
46
+ raise ValueError("sub_folder_name requires a drive instance")
47
+ if self.drive is None and self.target_folder_id is not None:
48
+ raise ValueError("target_folder_id requires a drive instance")
49
+ if self.drive is not None and self.target_folder_id is None:
50
+ raise ValueError("target_folder_id is required when drive is provided")
51
+ if self.sub_folder_name is not None and not isinstance(self.sub_folder_name, Path):
52
+ raise TypeError("sub_folder_name must be a Path or None")
53
+
54
+ def _build_csv_bytes(self) -> bytes:
55
+ headers = self.data_structure["headers"]
56
+ rows = self.data_structure["rows"]
57
+ buffer = io.StringIO(newline="")
58
+ writer = csv.writer(buffer, lineterminator="\n")
59
+ writer.writerow(headers)
60
+ writer.writerows(rows)
61
+ return buffer.getvalue().encode("utf-8")
62
+
63
+ def _build_fide_players_xml_bytes(self) -> bytes:
64
+ players = self.data_structure["players"]
65
+ root = ET.Element("playerslist")
66
+ xml_filename = "players_list_xml_foa.xml"
67
+
68
+ for player_data in players:
69
+ player_element = ET.SubElement(root, "player")
70
+ for key in sorted(player_data.keys()):
71
+ value = player_data[key]
72
+ field = ET.SubElement(player_element, key)
73
+ field.text = value
74
+
75
+ xml_bytes = ET.tostring(root, encoding="utf-8")
76
+ output_buffer = io.BytesIO()
77
+ with zipfile.ZipFile(output_buffer, "w", compression=zipfile.ZIP_DEFLATED) as output_zip:
78
+ output_zip.writestr(xml_filename, xml_bytes)
79
+ return output_buffer.getvalue()
80
+
81
+ def _create_sub_folder_if_needed(self, parent_folder_id: str, sub_folder_name: Path) -> str:
82
+ current_parent_id = parent_folder_id
83
+ folder_parts = [part for part in sub_folder_name.parts if part not in {"", "."}]
84
+
85
+ for folder_name in folder_parts:
86
+ subfolder_id = None
87
+ for item in self.drive.list_children(current_parent_id):
88
+ if item.get("name") == folder_name and item.get("mimeType") == GOOGLE_DRIVE_FOLDER_MIME:
89
+ subfolder_id = item.get("id")
90
+ break
91
+
92
+ if not subfolder_id:
93
+ folder_metadata = {
94
+ "mimeType": GOOGLE_DRIVE_FOLDER_MIME,
95
+ "name": folder_name,
96
+ "parents": [current_parent_id],
97
+ }
98
+ folder = (
99
+ self.drive.service.files()
100
+ .create(body=folder_metadata, fields="id", supportsAllDrives=True)
101
+ .execute()
102
+ )
103
+ subfolder_id = folder.get("id")
104
+
105
+ current_parent_id = subfolder_id
106
+
107
+ return current_parent_id
108
+
109
+ def _csv_name(self) -> str:
110
+ return f"{self.file_prefix}_{self._yymmdd()}.csv"
111
+
112
+ def _upload_or_return_bytes(self, archive_bytes: bytes, archive_filename: str) -> str | bytes:
113
+ if self.drive is None:
114
+ return archive_bytes
115
+
116
+ with tempfile.TemporaryDirectory() as temp_dir:
117
+ temp_path = Path(temp_dir)
118
+ zip_path = temp_path / archive_filename
119
+ zip_path.write_bytes(archive_bytes)
120
+ upload_folder_id = self.target_folder_id
121
+
122
+ if self.sub_folder_name is not None:
123
+ upload_folder_id = self._create_sub_folder_if_needed(upload_folder_id, self.sub_folder_name)
124
+
125
+ file_id = self.drive.upload_file(local_path=zip_path, folder_id=upload_folder_id)
126
+
127
+ return file_id
128
+
129
+ def _yymmdd(self) -> str:
130
+ return self.file_date.strftime("%y%m%d")
131
+
132
+ def _zip_name(self) -> str:
133
+ return f"{self.file_prefix}_{self._yymmdd()}.zip"
134
+
135
+ def build(self) -> str | bytes:
136
+ return self.build_urs_rating_pub_zip()
137
+
138
+ def build_fide_players_list_zip(self) -> str | bytes:
139
+ archive_bytes = self._build_fide_players_xml_bytes()
140
+ archive_filename = "players_list_xml.zip"
141
+ return self._upload_or_return_bytes(archive_bytes=archive_bytes, archive_filename=archive_filename)
142
+
143
+ def build_urs_rating_pub_zip(self) -> str | bytes:
144
+ archive_filename = self._zip_name()
145
+ csv_bytes = self._build_csv_bytes()
146
+ csv_filename = self._csv_name()
147
+ output_buffer = io.BytesIO()
148
+
149
+ with zipfile.ZipFile(output_buffer, "w", compression=zipfile.ZIP_DEFLATED) as output_zip:
150
+ output_zip.writestr(csv_filename, csv_bytes)
151
+
152
+ return self._upload_or_return_bytes(archive_bytes=output_buffer.getvalue(), archive_filename=archive_filename)