boti 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- boti-0.1.0/LICENSE +21 -0
- boti-0.1.0/PKG-INFO +397 -0
- boti-0.1.0/README.md +368 -0
- boti-0.1.0/pyproject.toml +60 -0
- boti-0.1.0/setup.cfg +4 -0
- boti-0.1.0/src/boti/__init__.py +13 -0
- boti-0.1.0/src/boti/core/__init__.py +20 -0
- boti-0.1.0/src/boti/core/filesystem.py +162 -0
- boti-0.1.0/src/boti/core/logger.py +180 -0
- boti-0.1.0/src/boti/core/logger_filters.py +110 -0
- boti-0.1.0/src/boti/core/logger_handlers.py +41 -0
- boti-0.1.0/src/boti/core/logger_runtime.py +50 -0
- boti-0.1.0/src/boti/core/managed_resource.py +319 -0
- boti-0.1.0/src/boti/core/models.py +89 -0
- boti-0.1.0/src/boti/core/project.py +167 -0
- boti-0.1.0/src/boti/core/secure_io.py +96 -0
- boti-0.1.0/src/boti/core/security.py +91 -0
- boti-0.1.0/src/boti/core/settings.py +124 -0
- boti-0.1.0/src/boti/main.py +21 -0
- boti-0.1.0/src/boti.egg-info/PKG-INFO +397 -0
- boti-0.1.0/src/boti.egg-info/SOURCES.txt +23 -0
- boti-0.1.0/src/boti.egg-info/dependency_links.txt +1 -0
- boti-0.1.0/src/boti.egg-info/entry_points.txt +2 -0
- boti-0.1.0/src/boti.egg-info/requires.txt +7 -0
- boti-0.1.0/src/boti.egg-info/top_level.txt +1 -0
boti-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 boti contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
boti-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: boti
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Core infrastructure for the Boti ecosystem
|
|
5
|
+
Author: lvalverdeb
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/lvalverdeb/boti
|
|
8
|
+
Project-URL: Repository, https://github.com/lvalverdeb/boti
|
|
9
|
+
Project-URL: Documentation, https://github.com/lvalverdeb/boti#readme
|
|
10
|
+
Project-URL: Bug Tracker, https://github.com/lvalverdeb/boti/issues
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
17
|
+
Classifier: Operating System :: OS Independent
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Requires-Python: >=3.13
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
License-File: LICENSE
|
|
22
|
+
Requires-Dist: fsspec>=2026.3.0
|
|
23
|
+
Requires-Dist: pyarrow>=23.0.1
|
|
24
|
+
Requires-Dist: pydantic>=2.12.5
|
|
25
|
+
Requires-Dist: pydantic-settings>=2.13.1
|
|
26
|
+
Provides-Extra: data
|
|
27
|
+
Requires-Dist: boti-data<0.2.0,>=0.1.0; extra == "data"
|
|
28
|
+
Dynamic: license-file
|
|
29
|
+
|
|
30
|
+
# boti
|
|
31
|
+
|
|
32
|
+
`boti` stands for **Base Object Transformation Interface**.
|
|
33
|
+
|
|
34
|
+
It is a Python library for building **reliable, reusable transformation-oriented software**: scripts, services, data pipelines, batch jobs, notebook helpers, and internal tooling that all need the same operational foundations.
|
|
35
|
+
|
|
36
|
+
At its core, `boti` is about giving transformation code a consistent runtime model:
|
|
37
|
+
|
|
38
|
+
- how resources are opened and closed
|
|
39
|
+
- how file access is constrained and validated
|
|
40
|
+
- how projects discover their root and runtime configuration
|
|
41
|
+
- how logs are emitted in a predictable way
|
|
42
|
+
|
|
43
|
+
The repository also contains the companion package **`boti-data`**, which extends that foundation with SQL, parquet, schema, gateway, and distributed data capabilities.
|
|
44
|
+
|
|
45
|
+
## What problem `boti` solves
|
|
46
|
+
|
|
47
|
+
A lot of data and automation code starts small and quickly becomes operationally messy:
|
|
48
|
+
|
|
49
|
+
- ad hoc setup and teardown logic
|
|
50
|
+
- duplicated path and file handling
|
|
51
|
+
- environment loading spread across scripts and notebooks
|
|
52
|
+
- inconsistent logging and diagnostics
|
|
53
|
+
- brittle assumptions about where code is running from
|
|
54
|
+
|
|
55
|
+
That usually leads to code that works in one notebook or one machine, but becomes fragile when reused in pipelines, packaged services, shared libraries, or scheduled jobs.
|
|
56
|
+
|
|
57
|
+
`boti` gives those projects a small set of **opinionated runtime primitives** so the same code can move more cleanly between local development, automation, and production workflows.
|
|
58
|
+
|
|
59
|
+
## Why `boti` is useful
|
|
60
|
+
|
|
61
|
+
`boti` is useful when you want transformation code to behave like a real software component instead of a collection of one-off scripts.
|
|
62
|
+
|
|
63
|
+
It helps by:
|
|
64
|
+
|
|
65
|
+
- standardising resource lifecycle with `ManagedResource`
|
|
66
|
+
- making constrained file access explicit with `SecureResource`
|
|
67
|
+
- centralising project-root and environment discovery with `ProjectService`
|
|
68
|
+
- giving the codebase a shared logging model with `Logger`
|
|
69
|
+
|
|
70
|
+
This is especially valuable when multiple teams or notebooks interact with the same codebase, because it reduces hidden assumptions and makes behaviour more predictable.
|
|
71
|
+
|
|
72
|
+
## What `boti-data` adds
|
|
73
|
+
|
|
74
|
+
`boti-data` is the data layer for the Boti ecosystem.
|
|
75
|
+
|
|
76
|
+
Where `boti` solves the runtime and application-structure problems, `boti-data` solves the **data access and data movement problems** that appear once teams need to work across databases, parquet files, schemas, and distributed workloads.
|
|
77
|
+
|
|
78
|
+
It provides:
|
|
79
|
+
|
|
80
|
+
- SQL database resources and session management
|
|
81
|
+
- SQLAlchemy model reflection and model registries
|
|
82
|
+
- connection catalogues for named data sources
|
|
83
|
+
- gateway-style loading APIs
|
|
84
|
+
- parquet resources and readers
|
|
85
|
+
- schema normalisation, validation, and field mapping
|
|
86
|
+
- filter expressions and join helpers
|
|
87
|
+
- partitioned and distributed loading workflows
|
|
88
|
+
|
|
89
|
+
In practice, it helps teams replace repetitive, hand-rolled access code with a consistent interface for loading, validating, shaping, and moving data.
|
|
90
|
+
|
|
91
|
+
## Where `boti-data` can make a big difference
|
|
92
|
+
|
|
93
|
+
`boti-data` is useful anywhere teams need to bridge operational systems and analytical workflows without rewriting the same infrastructure over and over.
|
|
94
|
+
|
|
95
|
+
It can be especially impactful in domains such as:
|
|
96
|
+
|
|
97
|
+
- **analytics engineering**: consistent loading from source systems into analysis-ready frames
|
|
98
|
+
- **business intelligence**: reusable connection catalogues, filters, and schema handling across reports
|
|
99
|
+
- **operations and supply chain**: joining transactional data from multiple systems with safer loading patterns
|
|
100
|
+
- **finance and risk**: explicit schemas, reproducible transformations, and controlled access to structured data
|
|
101
|
+
- **customer, product, and growth analytics**: repeatable extraction and normalisation across many upstream tables
|
|
102
|
+
- **ML and feature pipelines**: partitioned loads, parquet workflows, and predictable resource management
|
|
103
|
+
- **research and notebook-heavy teams**: moving from exploratory code to reusable library code without losing speed
|
|
104
|
+
|
|
105
|
+
The value is largest when data work sits in the gap between raw infrastructure and business logic: not just querying tables, but building maintainable, reusable data interfaces.
|
|
106
|
+
|
|
107
|
+
## Packages
|
|
108
|
+
|
|
109
|
+
### Core package
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
pip install boti
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Core imports:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from boti import Logger, ManagedResource, ProjectService, SecureResource
|
|
119
|
+
from boti.core import is_secure_path
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
You can also import from `boti.core` directly:
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from boti.core import Logger, ManagedResource, ProjectService, SecureResource
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Core + data package
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
pip install "boti[data]"
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
or:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
pip install boti-data
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Data imports live under the separate top-level package:
|
|
141
|
+
|
|
142
|
+
```python
|
|
143
|
+
from boti_data import DataGateway, DataHelper, SqlDatabaseConfig, SqlDatabaseResource
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Quick start
|
|
147
|
+
|
|
148
|
+
### Managed resource
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
from boti import ManagedResource
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class MyResource(ManagedResource):
|
|
155
|
+
def _cleanup(self) -> None:
|
|
156
|
+
print("cleaning up")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
with MyResource() as resource:
|
|
160
|
+
print(resource.closed) # False
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Filesystem configuration
|
|
164
|
+
|
|
165
|
+
`FilesystemConfig` provides a typed way to describe where a resource should read and write data. It uses `fsspec` underneath, so `boti` can work with the local filesystem, S3-compatible object storage, and any other backend supported by your installed `fsspec` drivers.
|
|
166
|
+
|
|
167
|
+
#### Local files
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
from boti.core.filesystem import FilesystemConfig, create_filesystem
|
|
171
|
+
|
|
172
|
+
config = FilesystemConfig(
|
|
173
|
+
fs_type="file",
|
|
174
|
+
fs_path="/srv/boti/data",
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
fs = create_filesystem(config)
|
|
178
|
+
with fs.open("/srv/boti/data/example.txt", "w") as handle:
|
|
179
|
+
handle.write("hello")
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
#### S3 server connections
|
|
183
|
+
|
|
184
|
+
Use this pattern when connecting to AWS S3 or to an S3-compatible server such as MinIO, Ceph, or another internal object-storage endpoint.
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
from boti.core.filesystem import FilesystemConfig, FilesystemAdapter
|
|
188
|
+
|
|
189
|
+
config = FilesystemConfig(
|
|
190
|
+
fs_type="s3",
|
|
191
|
+
fs_path="analytics-bucket/raw/events",
|
|
192
|
+
fs_key="ACCESS_KEY",
|
|
193
|
+
fs_secret="SECRET_KEY",
|
|
194
|
+
fs_endpoint="https://minio.internal.example",
|
|
195
|
+
fs_region="eu-west-1",
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
adapter = FilesystemAdapter(config)
|
|
199
|
+
fs = adapter.get_filesystem()
|
|
200
|
+
|
|
201
|
+
with fs.open("analytics-bucket/raw/events/2026-04-15.json", "rb") as handle:
|
|
202
|
+
payload = handle.read()
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
`fs_endpoint` points at the S3 server, while `fs_path` identifies the bucket and prefix you want to work with.
|
|
206
|
+
|
|
207
|
+
#### Other supported filesystems
|
|
208
|
+
|
|
209
|
+
Any backend recognised by the installed `fsspec` stack can be used through `fs_type`. Common examples include:
|
|
210
|
+
|
|
211
|
+
- `memory` for tests and ephemeral workflows
|
|
212
|
+
- `gcs` for Google Cloud Storage
|
|
213
|
+
- `az` or `abfs` for Azure storage
|
|
214
|
+
- `ftp`, `sftp`, or `http` where the relevant driver is installed
|
|
215
|
+
|
|
216
|
+
```python
|
|
217
|
+
from boti.core.filesystem import FilesystemConfig
|
|
218
|
+
|
|
219
|
+
memory_config = FilesystemConfig(fs_type="memory", fs_path="scratch")
|
|
220
|
+
gcs_config = FilesystemConfig(fs_type="gcs", fs_path="my-bucket/datasets")
|
|
221
|
+
azure_config = FilesystemConfig(fs_type="az", fs_path="container/path")
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### Project service
|
|
225
|
+
|
|
226
|
+
```python
|
|
227
|
+
from boti import ProjectService
|
|
228
|
+
|
|
229
|
+
project_root = ProjectService.detect_project_root()
|
|
230
|
+
env_file = ProjectService.setup_environment(project_root)
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### Secure file access
|
|
234
|
+
|
|
235
|
+
`SecureResource` wraps file operations in a sandbox. By default it allows paths under the detected project root and the system temporary directory, and you can add extra allowlisted paths explicitly.
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
from pathlib import Path
|
|
239
|
+
|
|
240
|
+
from boti import SecureResource
|
|
241
|
+
from boti.core.models import ResourceConfig
|
|
242
|
+
|
|
243
|
+
config = ResourceConfig(project_root=Path.cwd())
|
|
244
|
+
|
|
245
|
+
with SecureResource(config=config) as resource:
|
|
246
|
+
contents = resource.read_text_secure("README.md")
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
#### Allow an additional trusted directory
|
|
250
|
+
|
|
251
|
+
```python
|
|
252
|
+
from pathlib import Path
|
|
253
|
+
|
|
254
|
+
from boti import SecureResource
|
|
255
|
+
from boti.core.models import ResourceConfig
|
|
256
|
+
|
|
257
|
+
config = ResourceConfig(
|
|
258
|
+
project_root=Path("/workspace/project"),
|
|
259
|
+
extra_allowed_paths=[Path("/srv/shared/reference-data")],
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
with SecureResource(config=config) as resource:
|
|
263
|
+
reference = resource.read_text_secure("/srv/shared/reference-data/lookup.csv")
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
#### Block unsafe paths
|
|
267
|
+
|
|
268
|
+
```python
|
|
269
|
+
from pathlib import Path
|
|
270
|
+
|
|
271
|
+
from boti import SecureResource
|
|
272
|
+
from boti.core.models import ResourceConfig
|
|
273
|
+
|
|
274
|
+
config = ResourceConfig(project_root=Path("/workspace/project"))
|
|
275
|
+
|
|
276
|
+
with SecureResource(config=config) as resource:
|
|
277
|
+
try:
|
|
278
|
+
resource.read_text_secure("/etc/passwd")
|
|
279
|
+
except PermissionError:
|
|
280
|
+
print("outside the configured sandbox roots")
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### Logger
|
|
284
|
+
|
|
285
|
+
`Logger` provides a thread-safe, non-blocking logging layer with secure file handling and sensitive-data redaction.
|
|
286
|
+
|
|
287
|
+
#### Quick logger
|
|
288
|
+
|
|
289
|
+
```python
|
|
290
|
+
from pathlib import Path
|
|
291
|
+
|
|
292
|
+
from boti import Logger
|
|
293
|
+
|
|
294
|
+
logger = Logger.default_logger(
|
|
295
|
+
logger_name="daily_job",
|
|
296
|
+
log_file="daily_job",
|
|
297
|
+
base_dir=Path("/workspace/project"),
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
logger.info("starting extraction")
|
|
301
|
+
logger.warning("retrying after transient error")
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
#### Explicit logger configuration
|
|
305
|
+
|
|
306
|
+
```python
|
|
307
|
+
from pathlib import Path
|
|
308
|
+
|
|
309
|
+
from boti.core.logger import Logger
|
|
310
|
+
from boti.core.models import LoggerConfig
|
|
311
|
+
|
|
312
|
+
config = LoggerConfig(
|
|
313
|
+
log_dir=Path("/workspace/project/logs"),
|
|
314
|
+
logger_name="etl.pipeline",
|
|
315
|
+
log_file="etl_pipeline",
|
|
316
|
+
verbose=True,
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
logger = Logger(config)
|
|
320
|
+
logger.set_level(Logger.INFO)
|
|
321
|
+
logger.info("rows loaded=%s", 1200)
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
### Subclassing `ManagedResource`
|
|
325
|
+
|
|
326
|
+
`ManagedResource` supports both synchronous and asynchronous cleanup patterns, so custom resources can expose the same lifecycle contract whether they wrap filesystems, clients, sockets, or other runtime state.
|
|
327
|
+
|
|
328
|
+
#### Synchronous resource
|
|
329
|
+
|
|
330
|
+
```python
|
|
331
|
+
from boti import ManagedResource
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
class FilesystemResource(ManagedResource):
|
|
335
|
+
def write_text(self, path: str, content: str) -> None:
|
|
336
|
+
fs = self.require_fs()
|
|
337
|
+
with fs.open(path, "w", encoding="utf-8") as handle:
|
|
338
|
+
handle.write(content)
|
|
339
|
+
|
|
340
|
+
def read_text(self, path: str) -> str:
|
|
341
|
+
fs = self.require_fs()
|
|
342
|
+
with fs.open(path, "r", encoding="utf-8") as handle:
|
|
343
|
+
return handle.read()
|
|
344
|
+
|
|
345
|
+
def _cleanup(self) -> None:
|
|
346
|
+
if self._owns_fs and self.fs is not None:
|
|
347
|
+
self.fs = None
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
```python
|
|
351
|
+
import fsspec
|
|
352
|
+
|
|
353
|
+
resource = FilesystemResource(fs_factory=lambda: fsspec.filesystem("memory"))
|
|
354
|
+
|
|
355
|
+
with resource:
|
|
356
|
+
resource.write_text("memory://example.txt", "hello from fsspec")
|
|
357
|
+
print(resource.read_text("memory://example.txt"))
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
#### Asynchronous resource
|
|
361
|
+
|
|
362
|
+
```python
|
|
363
|
+
import asyncio
|
|
364
|
+
|
|
365
|
+
from boti import ManagedResource
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
class AsyncClientResource(ManagedResource):
|
|
369
|
+
def __init__(self, client) -> None:
|
|
370
|
+
super().__init__()
|
|
371
|
+
self.client = client
|
|
372
|
+
|
|
373
|
+
async def _acleanup(self) -> None:
|
|
374
|
+
await self.client.aclose()
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
async def main(client) -> None:
|
|
378
|
+
async with AsyncClientResource(client) as resource:
|
|
379
|
+
await asyncio.sleep(0)
|
|
380
|
+
```
|
|
381
|
+
|
|
382
|
+
If a subclass only implements `_cleanup()`, `await resource.aclose()` will fall back to running the synchronous cleanup safely.
|
|
383
|
+
|
|
384
|
+
## More package-specific docs
|
|
385
|
+
|
|
386
|
+
- [`packages/boti/README.md`](packages/boti/README.md)
|
|
387
|
+
- [`packages/boti-data/README.md`](packages/boti-data/README.md)
|
|
388
|
+
- [`examples/`](examples/)
|
|
389
|
+
- [`docs/`](docs/)
|
|
390
|
+
|
|
391
|
+
## Development
|
|
392
|
+
|
|
393
|
+
Run tests with the project interpreter:
|
|
394
|
+
|
|
395
|
+
```bash
|
|
396
|
+
PYTHONPATH=src python -m pytest -q
|
|
397
|
+
```
|