PyPI - polars-runtime-compat - Versions diffs - 1.34.0b3__cp39-abi3-manylinux_2_24_aarch64.whl → 1.34.0b5__cp39-abi3-manylinux_2_24_aarch64.whl - Mend

polars-runtime-compat 1.34.0b3__cp39-abi3-manylinux_2_24_aarch64.whl → 1.34.0b5__cp39-abi3-manylinux_2_24_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (204) hide show

_polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
polars/__init__.py +0 -528
polars/_cpu_check.py +0 -265
polars/_dependencies.py +0 -355
polars/_plr.py +0 -99
polars/_plr.pyi +0 -2496
polars/_reexport.py +0 -23
polars/_typing.py +0 -478
polars/_utils/__init__.py +0 -37
polars/_utils/async_.py +0 -102
polars/_utils/cache.py +0 -176
polars/_utils/cloud.py +0 -40
polars/_utils/constants.py +0 -29
polars/_utils/construction/__init__.py +0 -46
polars/_utils/construction/dataframe.py +0 -1397
polars/_utils/construction/other.py +0 -72
polars/_utils/construction/series.py +0 -560
polars/_utils/construction/utils.py +0 -118
polars/_utils/convert.py +0 -224
polars/_utils/deprecation.py +0 -406
polars/_utils/getitem.py +0 -457
polars/_utils/logging.py +0 -11
polars/_utils/nest_asyncio.py +0 -264
polars/_utils/parquet.py +0 -15
polars/_utils/parse/__init__.py +0 -12
polars/_utils/parse/expr.py +0 -242
polars/_utils/polars_version.py +0 -19
polars/_utils/pycapsule.py +0 -53
polars/_utils/scan.py +0 -27
polars/_utils/serde.py +0 -63
polars/_utils/slice.py +0 -215
polars/_utils/udfs.py +0 -1251
polars/_utils/unstable.py +0 -63
polars/_utils/various.py +0 -782
polars/_utils/wrap.py +0 -25
polars/api.py +0 -370
polars/catalog/__init__.py +0 -0
polars/catalog/unity/__init__.py +0 -19
polars/catalog/unity/client.py +0 -733
polars/catalog/unity/models.py +0 -152
polars/config.py +0 -1571
polars/convert/__init__.py +0 -25
polars/convert/general.py +0 -1046
polars/convert/normalize.py +0 -261
polars/dataframe/__init__.py +0 -5
polars/dataframe/_html.py +0 -186
polars/dataframe/frame.py +0 -12582
polars/dataframe/group_by.py +0 -1067
polars/dataframe/plotting.py +0 -257
polars/datatype_expr/__init__.py +0 -5
polars/datatype_expr/array.py +0 -56
polars/datatype_expr/datatype_expr.py +0 -304
polars/datatype_expr/list.py +0 -18
polars/datatype_expr/struct.py +0 -69
polars/datatypes/__init__.py +0 -122
polars/datatypes/_parse.py +0 -195
polars/datatypes/_utils.py +0 -48
polars/datatypes/classes.py +0 -1213
polars/datatypes/constants.py +0 -11
polars/datatypes/constructor.py +0 -172
polars/datatypes/convert.py +0 -366
polars/datatypes/group.py +0 -130
polars/exceptions.py +0 -230
polars/expr/__init__.py +0 -7
polars/expr/array.py +0 -964
polars/expr/binary.py +0 -346
polars/expr/categorical.py +0 -306
polars/expr/datetime.py +0 -2620
polars/expr/expr.py +0 -11272
polars/expr/list.py +0 -1408
polars/expr/meta.py +0 -444
polars/expr/name.py +0 -321
polars/expr/string.py +0 -3045
polars/expr/struct.py +0 -357
polars/expr/whenthen.py +0 -185
polars/functions/__init__.py +0 -193
polars/functions/aggregation/__init__.py +0 -33
polars/functions/aggregation/horizontal.py +0 -298
polars/functions/aggregation/vertical.py +0 -341
polars/functions/as_datatype.py +0 -848
polars/functions/business.py +0 -138
polars/functions/col.py +0 -384
polars/functions/datatype.py +0 -121
polars/functions/eager.py +0 -524
polars/functions/escape_regex.py +0 -29
polars/functions/lazy.py +0 -2751
polars/functions/len.py +0 -68
polars/functions/lit.py +0 -210
polars/functions/random.py +0 -22
polars/functions/range/__init__.py +0 -19
polars/functions/range/_utils.py +0 -15
polars/functions/range/date_range.py +0 -303
polars/functions/range/datetime_range.py +0 -370
polars/functions/range/int_range.py +0 -348
polars/functions/range/linear_space.py +0 -311
polars/functions/range/time_range.py +0 -287
polars/functions/repeat.py +0 -301
polars/functions/whenthen.py +0 -353
polars/interchange/__init__.py +0 -10
polars/interchange/buffer.py +0 -77
polars/interchange/column.py +0 -190
polars/interchange/dataframe.py +0 -230
polars/interchange/from_dataframe.py +0 -328
polars/interchange/protocol.py +0 -303
polars/interchange/utils.py +0 -170
polars/io/__init__.py +0 -64
polars/io/_utils.py +0 -317
polars/io/avro.py +0 -49
polars/io/clipboard.py +0 -36
polars/io/cloud/__init__.py +0 -17
polars/io/cloud/_utils.py +0 -80
polars/io/cloud/credential_provider/__init__.py +0 -17
polars/io/cloud/credential_provider/_builder.py +0 -520
polars/io/cloud/credential_provider/_providers.py +0 -618
polars/io/csv/__init__.py +0 -9
polars/io/csv/_utils.py +0 -38
polars/io/csv/batched_reader.py +0 -142
polars/io/csv/functions.py +0 -1495
polars/io/database/__init__.py +0 -6
polars/io/database/_arrow_registry.py +0 -70
polars/io/database/_cursor_proxies.py +0 -147
polars/io/database/_executor.py +0 -578
polars/io/database/_inference.py +0 -314
polars/io/database/_utils.py +0 -144
polars/io/database/functions.py +0 -516
polars/io/delta.py +0 -499
polars/io/iceberg/__init__.py +0 -3
polars/io/iceberg/_utils.py +0 -697
polars/io/iceberg/dataset.py +0 -556
polars/io/iceberg/functions.py +0 -151
polars/io/ipc/__init__.py +0 -8
polars/io/ipc/functions.py +0 -514
polars/io/json/__init__.py +0 -3
polars/io/json/read.py +0 -101
polars/io/ndjson.py +0 -332
polars/io/parquet/__init__.py +0 -17
polars/io/parquet/field_overwrites.py +0 -140
polars/io/parquet/functions.py +0 -722
polars/io/partition.py +0 -491
polars/io/plugins.py +0 -187
polars/io/pyarrow_dataset/__init__.py +0 -5
polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
polars/io/pyarrow_dataset/functions.py +0 -79
polars/io/scan_options/__init__.py +0 -5
polars/io/scan_options/_options.py +0 -59
polars/io/scan_options/cast_options.py +0 -126
polars/io/spreadsheet/__init__.py +0 -6
polars/io/spreadsheet/_utils.py +0 -52
polars/io/spreadsheet/_write_utils.py +0 -647
polars/io/spreadsheet/functions.py +0 -1323
polars/lazyframe/__init__.py +0 -9
polars/lazyframe/engine_config.py +0 -61
polars/lazyframe/frame.py +0 -8564
polars/lazyframe/group_by.py +0 -669
polars/lazyframe/in_process.py +0 -42
polars/lazyframe/opt_flags.py +0 -333
polars/meta/__init__.py +0 -14
polars/meta/build.py +0 -33
polars/meta/index_type.py +0 -27
polars/meta/thread_pool.py +0 -50
polars/meta/versions.py +0 -120
polars/ml/__init__.py +0 -0
polars/ml/torch.py +0 -213
polars/ml/utilities.py +0 -30
polars/plugins.py +0 -155
polars/py.typed +0 -0
polars/pyproject.toml +0 -103
polars/schema.py +0 -265
polars/selectors.py +0 -3117
polars/series/__init__.py +0 -5
polars/series/array.py +0 -776
polars/series/binary.py +0 -254
polars/series/categorical.py +0 -246
polars/series/datetime.py +0 -2275
polars/series/list.py +0 -1087
polars/series/plotting.py +0 -191
polars/series/series.py +0 -9197
polars/series/string.py +0 -2367
polars/series/struct.py +0 -154
polars/series/utils.py +0 -191
polars/sql/__init__.py +0 -7
polars/sql/context.py +0 -677
polars/sql/functions.py +0 -139
polars/string_cache.py +0 -185
polars/testing/__init__.py +0 -13
polars/testing/asserts/__init__.py +0 -9
polars/testing/asserts/frame.py +0 -231
polars/testing/asserts/series.py +0 -219
polars/testing/asserts/utils.py +0 -12
polars/testing/parametric/__init__.py +0 -33
polars/testing/parametric/profiles.py +0 -107
polars/testing/parametric/strategies/__init__.py +0 -22
polars/testing/parametric/strategies/_utils.py +0 -14
polars/testing/parametric/strategies/core.py +0 -615
polars/testing/parametric/strategies/data.py +0 -452
polars/testing/parametric/strategies/dtype.py +0 -436
polars/testing/parametric/strategies/legacy.py +0 -169
polars/type_aliases.py +0 -24
polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
{polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
{polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0

polars/interchange/utils.py DELETED Viewed

@@ -1,170 +0,0 @@
-from __future__ import annotations
-import re
-from typing import TYPE_CHECKING
-from polars.datatypes import (
-    Boolean,
-    Categorical,
-    Date,
-    Datetime,
-    Duration,
-    Enum,
-    Float32,
-    Float64,
-    Int8,
-    Int16,
-    Int32,
-    Int64,
-    String,
-    Time,
-    UInt8,
-    UInt16,
-    UInt32,
-    UInt64,
-)
-from polars.interchange.protocol import DtypeKind, Endianness
-if TYPE_CHECKING:
-    from polars._typing import PolarsDataType
-    from polars.datatypes import DataTypeClass
-    from polars.interchange.protocol import Dtype
-NE = Endianness.NATIVE
-polars_dtype_to_dtype_map: dict[DataTypeClass, Dtype] = {
-    Int8: (DtypeKind.INT, 8, "c", NE),
-    Int16: (DtypeKind.INT, 16, "s", NE),
-    Int32: (DtypeKind.INT, 32, "i", NE),
-    Int64: (DtypeKind.INT, 64, "l", NE),
-    UInt8: (DtypeKind.UINT, 8, "C", NE),
-    UInt16: (DtypeKind.UINT, 16, "S", NE),
-    UInt32: (DtypeKind.UINT, 32, "I", NE),
-    UInt64: (DtypeKind.UINT, 64, "L", NE),
-    Float32: (DtypeKind.FLOAT, 32, "f", NE),
-    Float64: (DtypeKind.FLOAT, 64, "g", NE),
-    Boolean: (DtypeKind.BOOL, 1, "b", NE),
-    String: (DtypeKind.STRING, 8, "U", NE),
-    Date: (DtypeKind.DATETIME, 32, "tdD", NE),
-    Time: (DtypeKind.DATETIME, 64, "ttu", NE),
-    Datetime: (DtypeKind.DATETIME, 64, "tsu:", NE),
-    Duration: (DtypeKind.DATETIME, 64, "tDu", NE),
-    Categorical: (DtypeKind.CATEGORICAL, 32, "I", NE),
-    Enum: (DtypeKind.CATEGORICAL, 32, "I", NE),
-}
-def polars_dtype_to_dtype(dtype: PolarsDataType) -> Dtype:
-    """Convert Polars data type to interchange protocol data type."""
-    try:
-        result = polars_dtype_to_dtype_map[dtype.base_type()]
-    except KeyError as exc:
-        msg = f"data type {dtype!r} not supported by the interchange protocol"
-        raise ValueError(msg) from exc
-    # Handle instantiated data types
-    if isinstance(dtype, Datetime):
-        return _datetime_to_dtype(dtype)
-    elif isinstance(dtype, Duration):
-        return _duration_to_dtype(dtype)
-    return result
-def _datetime_to_dtype(dtype: Datetime) -> Dtype:
-    tu = dtype.time_unit[0]
-    tz = dtype.time_zone if dtype.time_zone is not None else ""
-    arrow_c_type = f"ts{tu}:{tz}"
-    return DtypeKind.DATETIME, 64, arrow_c_type, NE
-def _duration_to_dtype(dtype: Duration) -> Dtype:
-    tu = dtype.time_unit[0]
-    arrow_c_type = f"tD{tu}"
-    return DtypeKind.DATETIME, 64, arrow_c_type, NE
-dtype_to_polars_dtype_map: dict[DtypeKind, dict[int, PolarsDataType]] = {
-    DtypeKind.INT: {
-        8: Int8,
-        16: Int16,
-        32: Int32,
-        64: Int64,
-    },
-    DtypeKind.UINT: {
-        8: UInt8,
-        16: UInt16,
-        32: UInt32,
-        64: UInt64,
-    },
-    DtypeKind.FLOAT: {
-        32: Float32,
-        64: Float64,
-    },
-    DtypeKind.BOOL: {
-        1: Boolean,
-        8: Boolean,
-    },
-    DtypeKind.STRING: {8: String},
-}
-def dtype_to_polars_dtype(dtype: Dtype) -> PolarsDataType:
-    """Convert interchange protocol data type to Polars data type."""
-    kind, bit_width, format_str, _ = dtype
-    if kind == DtypeKind.DATETIME:
-        return _temporal_dtype_to_polars_dtype(format_str, dtype)
-    elif kind == DtypeKind.CATEGORICAL:
-        return Enum
-    try:
-        return dtype_to_polars_dtype_map[kind][bit_width]
-    except KeyError as exc:
-        msg = f"unsupported data type: {dtype!r}"
-        raise NotImplementedError(msg) from exc
-def _temporal_dtype_to_polars_dtype(format_str: str, dtype: Dtype) -> PolarsDataType:
-    if (match := re.fullmatch(r"ts([mun]):(.*)", format_str)) is not None:
-        time_unit = match.group(1) + "s"
-        time_zone = match.group(2) or None
-        return Datetime(
-            time_unit=time_unit,  # type: ignore[arg-type]
-            time_zone=time_zone,
-        )
-    elif format_str == "tdD":
-        return Date
-    elif format_str == "ttu":
-        return Time
-    elif (match := re.fullmatch(r"tD([mun])", format_str)) is not None:
-        time_unit = match.group(1) + "s"
-        return Duration(time_unit=time_unit)  # type: ignore[arg-type]
-    msg = f"unsupported temporal data type: {dtype!r}"
-    raise NotImplementedError(msg)
-def get_buffer_length_in_elements(buffer_size: int, dtype: Dtype) -> int:
-    """Get the length of a buffer in elements."""
-    bits_per_element = dtype[1]
-    bytes_per_element, rest = divmod(bits_per_element, 8)
-    if rest > 0:
-        msg = f"cannot get buffer length for buffer with dtype {dtype!r}"
-        raise ValueError(msg)
-    return buffer_size // bytes_per_element
-def polars_dtype_to_data_buffer_dtype(dtype: PolarsDataType) -> PolarsDataType:
-    """Get the data type of the data buffer."""
-    if dtype.is_integer() or dtype.is_float() or dtype == Boolean:
-        return dtype
-    elif dtype.is_temporal():
-        return Int32 if dtype == Date else Int64
-    elif dtype == String:
-        return UInt8
-    elif dtype in (Enum, Categorical):
-        return UInt32
-    msg = f"unsupported data type: {dtype}"
-    raise NotImplementedError(msg)

polars/io/__init__.py DELETED Viewed

@@ -1,64 +0,0 @@
-"""Functions for reading data."""
-from polars.io.avro import read_avro
-from polars.io.clipboard import read_clipboard
-from polars.io.csv import read_csv, read_csv_batched, scan_csv
-from polars.io.database import read_database, read_database_uri
-from polars.io.delta import read_delta, scan_delta
-from polars.io.iceberg import scan_iceberg
-from polars.io.ipc import read_ipc, read_ipc_schema, read_ipc_stream, scan_ipc
-from polars.io.json import read_json
-from polars.io.ndjson import read_ndjson, scan_ndjson
-from polars.io.parquet import (
-    read_parquet,
-    read_parquet_metadata,
-    read_parquet_schema,
-    scan_parquet,
-)
-from polars.io.partition import (
-    BasePartitionContext,
-    KeyedPartition,
-    KeyedPartitionContext,
-    PartitionByKey,
-    PartitionMaxSize,
-    PartitionParted,
-)
-from polars.io.plugins import _defer as defer
-from polars.io.pyarrow_dataset import scan_pyarrow_dataset
-from polars.io.scan_options import ScanCastOptions
-from polars.io.spreadsheet import read_excel, read_ods
-__all__ = [
-    "defer",
-    "PartitionByKey",
-    "PartitionMaxSize",
-    "PartitionParted",
-    "KeyedPartition",
-    "BasePartitionContext",
-    "KeyedPartitionContext",
-    "read_avro",
-    "read_clipboard",
-    "read_csv",
-    "read_csv_batched",
-    "read_database",
-    "read_database_uri",
-    "read_delta",
-    "read_excel",
-    "read_ipc",
-    "read_ipc_schema",
-    "read_ipc_stream",
-    "read_json",
-    "read_ndjson",
-    "read_ods",
-    "read_parquet",
-    "read_parquet_metadata",
-    "read_parquet_schema",
-    "scan_csv",
-    "scan_delta",
-    "scan_iceberg",
-    "scan_ipc",
-    "scan_ndjson",
-    "scan_parquet",
-    "scan_pyarrow_dataset",
-    "ScanCastOptions",
-]

polars/io/_utils.py DELETED Viewed

@@ -1,317 +0,0 @@
-from __future__ import annotations
-import glob
-import re
-from contextlib import contextmanager
-from io import BytesIO, StringIO
-from pathlib import Path
-from typing import IO, TYPE_CHECKING, Any, overload
-from polars._dependencies import _FSSPEC_AVAILABLE, fsspec
-from polars._utils.various import (
-    is_int_sequence,
-    is_str_sequence,
-    normalize_filepath,
-)
-from polars.exceptions import NoDataError
-if TYPE_CHECKING:
-    from collections.abc import Iterator, Sequence
-    from contextlib import AbstractContextManager as ContextManager
-def parse_columns_arg(
-    columns: Sequence[str] | Sequence[int] | str | int | None,
-) -> tuple[Sequence[int] | None, Sequence[str] | None]:
-    """
-    Parse the `columns` argument of an I/O function.
-    Disambiguates between column names and column indices input.
-    Returns
-    -------
-    tuple
-        A tuple containing the columns as a projection and a list of column names.
-        Only one will be specified, the other will be `None`.
-    """
-    if columns is None:
-        return None, None
-    projection: Sequence[int] | None = None
-    column_names: Sequence[str] | None = None
-    if isinstance(columns, str):
-        column_names = [columns]
-    elif isinstance(columns, int):
-        projection = [columns]
-    elif is_str_sequence(columns):
-        _ensure_columns_are_unique(columns)
-        column_names = columns
-    elif is_int_sequence(columns):
-        _ensure_columns_are_unique(columns)
-        projection = columns
-    else:
-        msg = "the `columns` argument should contain a list of all integers or all string values"
-        raise TypeError(msg)
-    return projection, column_names
-def _ensure_columns_are_unique(columns: Sequence[str] | Sequence[int]) -> None:
-    if len(columns) != len(set(columns)):
-        msg = f"`columns` arg should only have unique values, got {columns!r}"
-        raise ValueError(msg)
-def parse_row_index_args(
-    row_index_name: str | None = None,
-    row_index_offset: int = 0,
-) -> tuple[str, int] | None:
-    """
-    Parse the `row_index_name` and `row_index_offset` arguments of an I/O function.
-    The Rust functions take a single tuple rather than two separate arguments.
-    """
-    if row_index_name is None:
-        return None
-    else:
-        return (row_index_name, row_index_offset)
-@overload
-def prepare_file_arg(
-    file: str | Path | list[str] | IO[bytes] | bytes,
-    encoding: str | None = ...,
-    *,
-    use_pyarrow: bool = ...,
-    raise_if_empty: bool = ...,
-    storage_options: dict[str, Any] | None = ...,
-) -> ContextManager[str | BytesIO]: ...
-@overload
-def prepare_file_arg(
-    file: str | Path | IO[str] | IO[bytes] | bytes,
-    encoding: str | None = ...,
-    *,
-    use_pyarrow: bool = ...,
-    raise_if_empty: bool = ...,
-    storage_options: dict[str, Any] | None = ...,
-) -> ContextManager[str | BytesIO]: ...
-@overload
-def prepare_file_arg(
-    file: str | Path | list[str] | IO[str] | IO[bytes] | bytes,
-    encoding: str | None = ...,
-    *,
-    use_pyarrow: bool = ...,
-    raise_if_empty: bool = ...,
-    storage_options: dict[str, Any] | None = ...,
-) -> ContextManager[str | list[str] | BytesIO | list[BytesIO]]: ...
-def prepare_file_arg(
-    file: str | Path | list[str] | IO[str] | IO[bytes] | bytes,
-    encoding: str | None = None,
-    *,
-    use_pyarrow: bool = False,
-    raise_if_empty: bool = True,
-    storage_options: dict[str, Any] | None = None,
-) -> ContextManager[str | list[str] | BytesIO | list[BytesIO]]:
-    """
-    Prepare file argument.
-    Utility for read_[csv, parquet]. (not to be used by scan_[csv, parquet]).
-    Returned value is always usable as a context.
-    A `StringIO`, `BytesIO` file is returned as a `BytesIO`.
-    A local path is returned as a string.
-    An http URL is read into a buffer and returned as a `BytesIO`.
-    When `encoding` is not `utf8` or `utf8-lossy`, the whole file is
-    first read in Python and decoded using the specified encoding and
-    returned as a `BytesIO` (for usage with `read_csv`). If encoding
-    ends with "-lossy", characters that can't be decoded are replaced
-    with `�`.
-    A `bytes` file is returned as a `BytesIO` if `use_pyarrow=True`.
-    When fsspec is installed, remote file(s) is (are) opened with
-    `fsspec.open(file, **kwargs)` or `fsspec.open_files(file, **kwargs)`.
-    If encoding is not `utf8` or `utf8-lossy`, decoding is handled by
-    fsspec too.
-    """
-    storage_options = storage_options.copy() if storage_options else {}
-    if storage_options and not _FSSPEC_AVAILABLE:
-        msg = "`fsspec` is required for `storage_options` argument"
-        raise ImportError(msg)
-    # Small helper to use a variable as context
-    @contextmanager
-    def managed_file(file: Any) -> Iterator[Any]:
-        try:
-            yield file
-        finally:
-            pass
-    has_utf8_utf8_lossy_encoding = (
-        encoding in {"utf8", "utf8-lossy"} if encoding else True
-    )
-    encoding_str = encoding if encoding else "utf8"
-    encoding_str, encoding_errors = (
-        (encoding_str[:-6], "replace")
-        if encoding_str.endswith("-lossy")
-        else (encoding_str, "strict")
-    )
-    # PyArrow allows directories, so we only check that something is not
-    # a dir if we are not using PyArrow
-    check_not_dir = not use_pyarrow
-    if isinstance(file, bytes):
-        if not has_utf8_utf8_lossy_encoding:
-            file = file.decode(encoding_str, errors=encoding_errors).encode("utf8")
-        return _check_empty(
-            BytesIO(file), context="bytes", raise_if_empty=raise_if_empty
-        )
-    if isinstance(file, StringIO):
-        return _check_empty(
-            BytesIO(file.read().encode("utf8")),
-            context="StringIO",
-            read_position=file.tell(),
-            raise_if_empty=raise_if_empty,
-        )
-    if isinstance(file, BytesIO):
-        if not has_utf8_utf8_lossy_encoding:
-            return _check_empty(
-                BytesIO(
-                    file.read()
-                    .decode(encoding_str, errors=encoding_errors)
-                    .encode("utf8")
-                ),
-                context="BytesIO",
-                read_position=file.tell(),
-                raise_if_empty=raise_if_empty,
-            )
-        return managed_file(
-            _check_empty(
-                b=file,
-                context="BytesIO",
-                read_position=file.tell(),
-                raise_if_empty=raise_if_empty,
-            )
-        )
-    if isinstance(file, Path):
-        if not has_utf8_utf8_lossy_encoding:
-            return _check_empty(
-                BytesIO(
-                    file.read_bytes()
-                    .decode(encoding_str, errors=encoding_errors)
-                    .encode("utf8")
-                ),
-                context=f"Path ({file!r})",
-                raise_if_empty=raise_if_empty,
-            )
-        return managed_file(normalize_filepath(file, check_not_directory=check_not_dir))
-    if isinstance(file, str):
-        # make sure that this is before fsspec
-        # as fsspec needs requests to be installed
-        # to read from http
-        if looks_like_url(file):
-            return process_file_url(file, encoding_str)
-        if _FSSPEC_AVAILABLE:
-            from fsspec.utils import infer_storage_options
-            # check if it is a local file
-            if infer_storage_options(file)["protocol"] == "file":
-                # (lossy) utf8
-                if has_utf8_utf8_lossy_encoding:
-                    return managed_file(
-                        normalize_filepath(file, check_not_directory=check_not_dir)
-                    )
-                # decode first
-                with Path(file).open(
-                    encoding=encoding_str, errors=encoding_errors
-                ) as f:
-                    return _check_empty(
-                        BytesIO(f.read().encode("utf8")),
-                        context=f"{file!r}",
-                        raise_if_empty=raise_if_empty,
-                    )
-            storage_options["encoding"] = encoding
-            storage_options["errors"] = encoding_errors
-            return fsspec.open(file, **storage_options)
-    if isinstance(file, list) and bool(file) and all(isinstance(f, str) for f in file):
-        if _FSSPEC_AVAILABLE:
-            from fsspec.utils import infer_storage_options
-            if has_utf8_utf8_lossy_encoding:
-                if all(infer_storage_options(f)["protocol"] == "file" for f in file):
-                    return managed_file(
-                        [
-                            normalize_filepath(f, check_not_directory=check_not_dir)
-                            for f in file
-                        ]
-                    )
-            storage_options["encoding"] = encoding
-            storage_options["errors"] = encoding_errors
-            return fsspec.open_files(file, **storage_options)
-    if isinstance(file, str):
-        file = normalize_filepath(file, check_not_directory=check_not_dir)
-        if not has_utf8_utf8_lossy_encoding:
-            with Path(file).open(encoding=encoding_str, errors=encoding_errors) as f:
-                return _check_empty(
-                    BytesIO(f.read().encode("utf8")),
-                    context=f"{file!r}",
-                    raise_if_empty=raise_if_empty,
-                )
-    return managed_file(file)
-def _check_empty(
-    b: BytesIO, *, context: str, raise_if_empty: bool, read_position: int | None = None
-) -> BytesIO:
-    if raise_if_empty and b.getbuffer().nbytes == 0:
-        hint = (
-            f" (buffer position = {read_position}; try seek(0) before reading?)"
-            if context in ("StringIO", "BytesIO") and read_position
-            else ""
-        )
-        msg = f"empty data from {context}{hint}"
-        raise NoDataError(msg)
-    return b
-def looks_like_url(path: str) -> bool:
-    return re.match("^(ht|f)tps?://", path, re.IGNORECASE) is not None
-def process_file_url(path: str, encoding: str | None = None) -> BytesIO:
-    from urllib.request import urlopen
-    with urlopen(path) as f:
-        if not encoding or encoding in {"utf8", "utf8-lossy"}:
-            return BytesIO(f.read())
-        else:
-            return BytesIO(f.read().decode(encoding).encode("utf8"))
-def is_glob_pattern(file: str) -> bool:
-    return any(char in file for char in ["*", "?", "["])
-def is_local_file(file: str) -> bool:
-    try:
-        next(glob.iglob(file, recursive=True))  # noqa: PTH207
-    except StopIteration:
-        return False
-    else:
-        return True

polars/io/avro.py DELETED Viewed

@@ -1,49 +0,0 @@
-from __future__ import annotations
-import contextlib
-from pathlib import Path
-from typing import IO, TYPE_CHECKING
-from polars._utils.various import normalize_filepath
-from polars._utils.wrap import wrap_df
-from polars.io._utils import parse_columns_arg
-with contextlib.suppress(ImportError):  # Module not available when building docs
-    from polars._plr import PyDataFrame
-if TYPE_CHECKING:
-    from polars import DataFrame
-def read_avro(
-    source: str | Path | IO[bytes] | bytes,
-    *,
-    columns: list[int] | list[str] | None = None,
-    n_rows: int | None = None,
-) -> DataFrame:
-    """
-    Read into a DataFrame from Apache Avro format.
-    Parameters
-    ----------
-    source
-        Path to a file or a file-like object (by "file-like object" we refer to objects
-        that have a `read()` method, such as a file handler like the builtin `open`
-        function, or a `BytesIO` instance). For file-like objects, the stream position
-        may not be updated accordingly after reading.
-    columns
-        Columns to select. Accepts a list of column indices (starting at zero) or a list
-        of column names.
-    n_rows
-        Stop reading from Apache Avro file after reading `n_rows`.
-    Returns
-    -------
-    DataFrame
-    """
-    if isinstance(source, (str, Path)):
-        source = normalize_filepath(source)
-    projection, column_names = parse_columns_arg(columns)
-    pydf = PyDataFrame.read_avro(source, column_names, projection, n_rows)
-    return wrap_df(pydf)

polars/io/clipboard.py DELETED Viewed

@@ -1,36 +0,0 @@
-from __future__ import annotations
-import contextlib
-from io import StringIO
-from typing import TYPE_CHECKING, Any
-from polars.io.csv.functions import read_csv
-with contextlib.suppress(ImportError):
-    from polars._plr import read_clipboard_string as _read_clipboard_string
-if TYPE_CHECKING:
-    from polars import DataFrame
-def read_clipboard(separator: str = "\t", **kwargs: Any) -> DataFrame:
-    """
-    Read text from clipboard and pass to `read_csv`.
-    Useful for reading data copied from Excel or other similar spreadsheet software.
-    Parameters
-    ----------
-    separator
-        Single byte character to use as separator parsing csv from clipboard.
-    kwargs
-        Additional arguments passed to `read_csv`.
-    See Also
-    --------
-    read_csv : Read a csv file into a DataFrame.
-    DataFrame.write_clipboard : Write a DataFrame to the clipboard.
-    """
-    csv_string: str = _read_clipboard_string()
-    io_string = StringIO(csv_string)
-    return read_csv(source=io_string, separator=separator, **kwargs)

polars/io/cloud/__init__.py DELETED Viewed

@@ -1,17 +0,0 @@
-from polars.io.cloud.credential_provider._providers import (
-    CredentialProvider,
-    CredentialProviderAWS,
-    CredentialProviderAzure,
-    CredentialProviderFunction,
-    CredentialProviderFunctionReturn,
-    CredentialProviderGCP,
-)
-__all__ = [
-    "CredentialProvider",
-    "CredentialProviderAWS",
-    "CredentialProviderAzure",
-    "CredentialProviderFunction",
-    "CredentialProviderFunctionReturn",
-    "CredentialProviderGCP",
-]