PyPI - boti-data - Versions diffs - 0.1.0__tar.gz - Mend

boti-data 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

boti_data-0.1.0/PKG-INFO +189 -0
boti_data-0.1.0/README.md +160 -0
boti_data-0.1.0/pyproject.toml +64 -0
boti_data-0.1.0/setup.cfg +4 -0
boti_data-0.1.0/src/boti_data/__init__.py +92 -0
boti_data-0.1.0/src/boti_data/arrow_schema.py +326 -0
boti_data-0.1.0/src/boti_data/connection_catalog.py +121 -0
boti_data-0.1.0/src/boti_data/db/__init__.py +42 -0
boti_data-0.1.0/src/boti_data/db/arrow_schema_mapper.py +331 -0
boti_data-0.1.0/src/boti_data/db/engine_registry.py +113 -0
boti_data-0.1.0/src/boti_data/db/partitioned_execution.py +333 -0
boti_data-0.1.0/src/boti_data/db/partitioned_loader.py +169 -0
boti_data-0.1.0/src/boti_data/db/partitioned_planner.py +429 -0
boti_data-0.1.0/src/boti_data/db/partitioned_types.py +123 -0
boti_data-0.1.0/src/boti_data/db/sql_config.py +154 -0
boti_data-0.1.0/src/boti_data/db/sql_engine.py +330 -0
boti_data-0.1.0/src/boti_data/db/sql_manager.py +43 -0
boti_data-0.1.0/src/boti_data/db/sql_model_builder.py +79 -0
boti_data-0.1.0/src/boti_data/db/sql_model_registry.py +384 -0
boti_data-0.1.0/src/boti_data/db/sql_readonly.py +76 -0
boti_data-0.1.0/src/boti_data/db/sql_resource.py +180 -0
boti_data-0.1.0/src/boti_data/db/sqlalchemy_async.py +19 -0
boti_data-0.1.0/src/boti_data/distributed.py +182 -0
boti_data-0.1.0/src/boti_data/field_map.py +177 -0
boti_data-0.1.0/src/boti_data/filters/__init__.py +25 -0
boti_data-0.1.0/src/boti_data/filters/arrow_kernels.py +351 -0
boti_data-0.1.0/src/boti_data/filters/expressions.py +149 -0
boti_data-0.1.0/src/boti_data/filters/handler.py +268 -0
boti_data-0.1.0/src/boti_data/filters/utils.py +449 -0
boti_data-0.1.0/src/boti_data/gateway/__init__.py +13 -0
boti_data-0.1.0/src/boti_data/gateway/arrow_adapters.py +337 -0
boti_data-0.1.0/src/boti_data/gateway/core.py +2068 -0
boti_data-0.1.0/src/boti_data/gateway/frame_strategies.py +385 -0
boti_data-0.1.0/src/boti_data/gateway/loaders.py +284 -0
boti_data-0.1.0/src/boti_data/gateway/normalization.py +182 -0
boti_data-0.1.0/src/boti_data/gateway/requests.py +150 -0
boti_data-0.1.0/src/boti_data/helper.py +199 -0
boti_data-0.1.0/src/boti_data/joins.py +147 -0
boti_data-0.1.0/src/boti_data/parquet/__init__.py +8 -0
boti_data-0.1.0/src/boti_data/parquet/reader.py +190 -0
boti_data-0.1.0/src/boti_data/parquet/resource.py +572 -0
boti_data-0.1.0/src/boti_data/schema.py +245 -0
boti_data-0.1.0/src/boti_data.egg-info/PKG-INFO +189 -0
boti_data-0.1.0/src/boti_data.egg-info/SOURCES.txt +45 -0
boti_data-0.1.0/src/boti_data.egg-info/dependency_links.txt +1 -0
boti_data-0.1.0/src/boti_data.egg-info/requires.txt +10 -0
boti_data-0.1.0/src/boti_data.egg-info/top_level.txt +1 -0

boti_data-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,189 @@
+Metadata-Version: 2.4
+Name: boti-data
+Version: 0.1.0
+Summary: Data infrastructure for the Boti ecosystem
+Author-email: Your Name <your.email@example.com>
+License: MIT
+Project-URL: Homepage, https://github.com/your-username/boti-data
+Project-URL: Repository, https://github.com/your-username/boti-data
+Project-URL: Documentation, https://github.com/your-username/boti-data#readme
+Project-URL: Issues, https://github.com/your-username/boti-data/issues
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.13
+Description-Content-Type: text/markdown
+Requires-Dist: asyncmy>=0.2.11
+Requires-Dist: boti<0.2.0,>=0.1.0
+Requires-Dist: dask[dataframe,distributed]>=2026.3.0
+Requires-Dist: fsspec>=2026.3.0
+Requires-Dist: pandas>=3.0.2
+Requires-Dist: polars>=1.29.0
+Requires-Dist: pyarrow>=23.0.1
+Requires-Dist: pydantic>=2.12.5
+Requires-Dist: pymysql>=1.1.2
+Requires-Dist: sqlalchemy[asyncio]>=2.0.49
+# boti-data
+`boti-data` is the **data access and data transformation layer** of the Boti ecosystem.
+It builds on top of `boti` and gives teams a reusable interface for working with structured data across databases, parquet datasets, schema-controlled transformations, and distributed or partitioned loading workflows.
+## What `boti-data` is for
+Many teams have the same recurring problem: business logic depends on data that lives in multiple places, arrives in slightly different shapes, and is loaded through a mix of notebooks, scripts, ad hoc SQL, and one-off helpers.
+`boti-data` helps turn that into a more coherent data access layer.
+It is designed for codebases that need to:
+- connect to named data sources consistently
+- reflect or model database tables without hand-writing everything up front
+- load data through a gateway instead of bespoke query snippets everywhere
+- normalise and validate schemas before downstream use
+- combine parquet and database workflows in one library
+- scale from simple local reads to partitioned or distributed loading
+## Problems `boti-data` solves
+`boti-data` is useful when data code is suffering from issues like:
+- repeated connection boilerplate across notebooks and services
+- slow, fragile query code copied from place to place
+- inconsistent schema assumptions between producers and consumers
+- difficult transitions from exploratory analysis to reusable pipelines
+- manual join and field-mapping logic repeated in many modules
+- no common abstraction for loading data from SQL and parquet sources
+By centralising those patterns, `boti-data` reduces duplicated plumbing and makes transformations easier to reason about.
+## Why `boti-data` can make a huge difference
+The biggest benefit of `boti-data` is that it creates a **shared data interface** between infrastructure and business logic.
+That means teams can spend less time rewriting access code and more time working on actual transformations, validation rules, and downstream decisions.
+It can make a major difference when:
+- analysts and engineers share the same source systems
+- a notebook prototype needs to become production code
+- multiple data products depend on the same tables or parquet layouts
+- schema drift is a recurring source of errors
+- large extracts need partitioning or distributed execution
+- teams want a clean boundary between connection details and transformation logic
+## Domain areas where it is especially valuable
+`boti-data` is intentionally general-purpose, but it is especially strong in domains where structured operational data must be transformed into reliable analytical or decision-ready datasets.
+Examples include:
+- **analytics engineering**: building reusable source loaders, schema maps, and standardised transformations
+- **business operations**: consolidating data from transactional systems, planning tools, and operational databases
+- **finance and controlling**: reconciling structured data with explicit schema expectations and repeatable joins
+- **risk, compliance, and audit**: validating input shape, tracing transformations, and standardising access patterns
+- **customer and product analytics**: joining behavioural and operational datasets with less custom plumbing
+- **supply chain and logistics**: unifying inventory, movement, order, and status data from several systems
+- **data platform and internal tooling**: giving teams a common gateway layer instead of ad hoc connectors
+- **ML feature preparation**: building reliable dataset assembly steps from SQL and parquet sources
+In those settings, the gains are not just convenience. They show up as better reuse, fewer integration bugs, and faster movement from exploration to production.
+## Core capabilities
+- SQL database resources
+- async and sync database access helpers
+- SQLAlchemy model reflection and registries
+- connection catalogues
+- parquet resources and readers
+- gateway-style loading APIs
+- filter expressions
+- schema normalisation and validation helpers
+- field mapping and join helpers
+- partitioned and distributed data workflows
+## Installation
+Install directly:
+```bash
+pip install boti-data
+```
+Or install through the core package extra:
+```bash
+pip install "boti[data]"
+```
+## Imports
+`boti-data` uses the top-level Python package `boti_data`:
+```python
+from boti_data import (
+    ConnectionCatalog,
+    DataGateway,
+    DataHelper,
+    FieldMap,
+    ParquetDataConfig,
+    ParquetDataResource,
+    SqlAlchemyModelBuilder,
+    SqlDatabaseConfig,
+    SqlDatabaseResource,
+)
+```
+Lower-level modules are also available:
+```python
+from boti_data.db import SqlDatabaseConfig, SqlDatabaseResource
+from boti_data.gateway import DataGateway
+from boti_data.parquet import ParquetDataConfig, ParquetDataResource
+from boti_data.schema import validate_schema
+```
+## Examples
+### SQL resource
+```python
+from boti_data import SqlDatabaseConfig, SqlDatabaseResource
+config = SqlDatabaseConfig(connection_url="sqlite:///example.db", query_only=True)
+with SqlDatabaseResource(config) as db:
+    with db.session() as session:
+        rows = session.execute(...)
+```
+### Gateway
+```python
+from boti_data import DataGateway, SqlDatabaseConfig
+gateway = DataGateway(
+    backend="sqlalchemy",
+    config=SqlDatabaseConfig(connection_url="sqlite:///example.db", query_only=True),
+)
+```
+## Relationship to `boti`
+`boti-data` depends on `boti`, and reuses:
+- logging
+- resource lifecycle
+- secure I/O helpers
+- project/environment utilities
+If you only need the runtime primitives, install `boti`.
+If you need a stronger data access and transformation layer, install `boti-data` or `boti[data]`.
+## Development & Deployment
+See [docs/DEPLOYMENT.md](docs/DEPLOYMENT.md) for publishing instructions.

boti_data-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,160 @@
+# boti-data
+`boti-data` is the **data access and data transformation layer** of the Boti ecosystem.
+It builds on top of `boti` and gives teams a reusable interface for working with structured data across databases, parquet datasets, schema-controlled transformations, and distributed or partitioned loading workflows.
+## What `boti-data` is for
+Many teams have the same recurring problem: business logic depends on data that lives in multiple places, arrives in slightly different shapes, and is loaded through a mix of notebooks, scripts, ad hoc SQL, and one-off helpers.
+`boti-data` helps turn that into a more coherent data access layer.
+It is designed for codebases that need to:
+- connect to named data sources consistently
+- reflect or model database tables without hand-writing everything up front
+- load data through a gateway instead of bespoke query snippets everywhere
+- normalise and validate schemas before downstream use
+- combine parquet and database workflows in one library
+- scale from simple local reads to partitioned or distributed loading
+## Problems `boti-data` solves
+`boti-data` is useful when data code is suffering from issues like:
+- repeated connection boilerplate across notebooks and services
+- slow, fragile query code copied from place to place
+- inconsistent schema assumptions between producers and consumers
+- difficult transitions from exploratory analysis to reusable pipelines
+- manual join and field-mapping logic repeated in many modules
+- no common abstraction for loading data from SQL and parquet sources
+By centralising those patterns, `boti-data` reduces duplicated plumbing and makes transformations easier to reason about.
+## Why `boti-data` can make a huge difference
+The biggest benefit of `boti-data` is that it creates a **shared data interface** between infrastructure and business logic.
+That means teams can spend less time rewriting access code and more time working on actual transformations, validation rules, and downstream decisions.
+It can make a major difference when:
+- analysts and engineers share the same source systems
+- a notebook prototype needs to become production code
+- multiple data products depend on the same tables or parquet layouts
+- schema drift is a recurring source of errors
+- large extracts need partitioning or distributed execution
+- teams want a clean boundary between connection details and transformation logic
+## Domain areas where it is especially valuable
+`boti-data` is intentionally general-purpose, but it is especially strong in domains where structured operational data must be transformed into reliable analytical or decision-ready datasets.
+Examples include:
+- **analytics engineering**: building reusable source loaders, schema maps, and standardised transformations
+- **business operations**: consolidating data from transactional systems, planning tools, and operational databases
+- **finance and controlling**: reconciling structured data with explicit schema expectations and repeatable joins
+- **risk, compliance, and audit**: validating input shape, tracing transformations, and standardising access patterns
+- **customer and product analytics**: joining behavioural and operational datasets with less custom plumbing
+- **supply chain and logistics**: unifying inventory, movement, order, and status data from several systems
+- **data platform and internal tooling**: giving teams a common gateway layer instead of ad hoc connectors
+- **ML feature preparation**: building reliable dataset assembly steps from SQL and parquet sources
+In those settings, the gains are not just convenience. They show up as better reuse, fewer integration bugs, and faster movement from exploration to production.
+## Core capabilities
+- SQL database resources
+- async and sync database access helpers
+- SQLAlchemy model reflection and registries
+- connection catalogues
+- parquet resources and readers
+- gateway-style loading APIs
+- filter expressions
+- schema normalisation and validation helpers
+- field mapping and join helpers
+- partitioned and distributed data workflows
+## Installation
+Install directly:
+```bash
+pip install boti-data
+```
+Or install through the core package extra:
+```bash
+pip install "boti[data]"
+```
+## Imports
+`boti-data` uses the top-level Python package `boti_data`:
+```python
+from boti_data import (
+    ConnectionCatalog,
+    DataGateway,
+    DataHelper,
+    FieldMap,
+    ParquetDataConfig,
+    ParquetDataResource,
+    SqlAlchemyModelBuilder,
+    SqlDatabaseConfig,
+    SqlDatabaseResource,
+)
+```
+Lower-level modules are also available:
+```python
+from boti_data.db import SqlDatabaseConfig, SqlDatabaseResource
+from boti_data.gateway import DataGateway
+from boti_data.parquet import ParquetDataConfig, ParquetDataResource
+from boti_data.schema import validate_schema
+```
+## Examples
+### SQL resource
+```python
+from boti_data import SqlDatabaseConfig, SqlDatabaseResource
+config = SqlDatabaseConfig(connection_url="sqlite:///example.db", query_only=True)
+with SqlDatabaseResource(config) as db:
+    with db.session() as session:
+        rows = session.execute(...)
+```
+### Gateway
+```python
+from boti_data import DataGateway, SqlDatabaseConfig
+gateway = DataGateway(
+    backend="sqlalchemy",
+    config=SqlDatabaseConfig(connection_url="sqlite:///example.db", query_only=True),
+)
+```
+## Relationship to `boti`
+`boti-data` depends on `boti`, and reuses:
+- logging
+- resource lifecycle
+- secure I/O helpers
+- project/environment utilities
+If you only need the runtime primitives, install `boti`.
+If you need a stronger data access and transformation layer, install `boti-data` or `boti[data]`.
+## Development & Deployment
+See [docs/DEPLOYMENT.md](docs/DEPLOYMENT.md) for publishing instructions.

boti_data-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,64 @@
+[project]
+name = "boti-data"
+version = "0.1.0"
+description = "Data infrastructure for the Boti ecosystem"
+readme = "README.md"
+requires-python = ">=3.13"
+license = {text = "MIT"}
+authors = [
+    {name = "Your Name", email = "your.email@example.com"}
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Software Development :: Libraries :: Python Modules",
+]
+dependencies = [
+    "asyncmy>=0.2.11",
+    "boti>=0.1.0,<0.2.0",
+    "dask[dataframe,distributed]>=2026.3.0",
+    "fsspec>=2026.3.0",
+    "pandas>=3.0.2",
+    "polars>=1.29.0",
+    "pyarrow>=23.0.1",
+    "pydantic>=2.12.5",
+    "pymysql>=1.1.2",
+    "sqlalchemy[asyncio]>=2.0.49",
+]
+[project.urls]
+Homepage = "https://github.com/your-username/boti-data"
+Repository = "https://github.com/your-username/boti-data"
+Documentation = "https://github.com/your-username/boti-data#readme"
+Issues = "https://github.com/your-username/boti-data/issues"
+[build-system]
+requires = ["setuptools>=80", "wheel"]
+build-backend = "setuptools.build_meta"
+[dependency-groups]
+dev = [
+    "pytest>=9.0.3",
+    "pytest-asyncio>=1.3.0",
+]
+[tool.uv]
+publish.token = {env = "UV_PUBLISH_TOKEN"}
+[tool.pytest.ini_options]
+markers = [
+    "security_regression: focused regression coverage for security fixes and audit findings",
+]
+[tool.setuptools]
+include-package-data = false
+[tool.setuptools.package-dir]
+"" = "src"
+[tool.setuptools.packages.find]
+where = ["src"]
+include = ["boti_data*"]

boti_data-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

boti_data-0.1.0/src/boti_data/__init__.py ADDED Viewed

@@ -0,0 +1,92 @@
+"""
+Data modules and interfaces for the Boti pipeline context.
+"""
+from boti_data.db import (
+    AsyncSqlDatabaseResource,
+    BuilderConfig,
+    DefaultBase,
+    EngineRegistry,
+    RegistryConfig,
+    SqlAlchemyModelBuilder,
+    SqlDatabaseConfig,
+    SqlDatabaseResource,
+    SqlPartitionPlan,
+    SqlPartitionSpec,
+    SqlPartitionedLoadRequest,
+    SqlPartitionedLoader,
+    SqlModelRegistry,
+    ensure_greenlet_available,
+    get_global_registry,
+)
+from boti_data.connection_catalog import ConnectionCatalog
+from boti_data.parquet import ParquetDataConfig, ParquetDataResource, ParquetReader
+from boti_data.filters import (
+    FilterHandler,
+    Expr,
+    TrueExpr,
+    And,
+    Or,
+    Not,
+)
+from boti_data.gateway import DataGateway, ParquetLoadRequest, SqlLoadRequest
+from boti_data.helper import DataHelper
+from boti_data.field_map import FieldMap
+from boti_data.distributed import DaskSession, dask_session
+from boti_data.gateway import DataFrameOptions, DataFrameParams
+from boti_data.joins import indexed_left_join, left_join_frames
+from boti_data.schema import (
+    SchemaValidationError,
+    align_frames_for_join,
+    apply_schema_map,
+    infer_schema_map,
+    normalize_dtype_alias,
+    normalize_schema_map,
+    validate_schema,
+)
+__all__ = [
+    "And",
+    "AsyncSqlDatabaseResource",
+    "BuilderConfig",
+    "ConnectionCatalog",
+    "DataFrameOptions",
+    "DataFrameParams",
+    "DataGateway",
+    "DataHelper",
+    "DaskSession",
+    "DefaultBase",
+    "EngineRegistry",
+    "Expr",
+    "FieldMap",
+    "FilterHandler",
+    "indexed_left_join",
+    "Not",
+    "Or",
+    "ParquetDataConfig",
+    "ParquetLoadRequest",
+    "ParquetDataResource",
+    "ParquetReader",
+    "RegistryConfig",
+    "SchemaValidationError",
+    "SqlLoadRequest",
+    "SqlAlchemyModelBuilder",
+    "SqlDatabaseConfig",
+    "SqlDatabaseResource",
+    "SqlPartitionPlan",
+    "SqlPartitionSpec",
+    "SqlPartitionedLoadRequest",
+    "SqlPartitionedLoader",
+    "SqlModelRegistry",
+    "TrueExpr",
+    "align_frames_for_join",
+    "apply_schema_map",
+    "ensure_greenlet_available",
+    "get_global_registry",
+    "infer_schema_map",
+    "dask_session",
+    "left_join_frames",
+    "normalize_dtype_alias",
+    "normalize_schema_map",
+    "validate_schema",
+]