PyPI - exotools - Versions diffs - 0.0.1__py3-none-any.whl - Mend

exotools 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

exotools/__init__.py +47 -0
exotools/datasets/__init__.py +13 -0
exotools/datasets/_exoplanet_dataset_reducer.py +96 -0
exotools/datasets/candidate_exoplanets.py +44 -0
exotools/datasets/gaia_parameters.py +68 -0
exotools/datasets/known_exoplanets.py +124 -0
exotools/datasets/lightcurves.py +58 -0
exotools/datasets/tess.py +91 -0
exotools/db/__init__.py +21 -0
exotools/db/base_db.py +74 -0
exotools/db/exo_db.py +113 -0
exotools/db/gaia_db.py +62 -0
exotools/db/lightcurve_db.py +112 -0
exotools/db/lightcurve_plus.py +196 -0
exotools/db/star_system/__init__.py +14 -0
exotools/db/star_system/planet.py +90 -0
exotools/db/star_system/star.py +26 -0
exotools/db/star_system/star_system.py +87 -0
exotools/db/star_system/uncertain_data.py +29 -0
exotools/db/starsystem_db.py +38 -0
exotools/db/tic_db.py +23 -0
exotools/db/toi_db.py +13 -0
exotools/db/urls_db.py +36 -0
exotools/downloaders/__init__.py +22 -0
exotools/downloaders/_utils.py +22 -0
exotools/downloaders/dataset_downloader.py +98 -0
exotools/downloaders/exoplanets_downloader.py +103 -0
exotools/downloaders/gaia_downloader.py +135 -0
exotools/downloaders/lightcurve_downloader.py +107 -0
exotools/downloaders/tap_service.py +84 -0
exotools/downloaders/tess_catalog_downloader.py +155 -0
exotools/downloaders/tess_observations_downloader.py +61 -0
exotools/downloaders/toi_exoplanets_downloader.py +52 -0
exotools/io/__init__.py +11 -0
exotools/io/base_storage.py +33 -0
exotools/io/fs_storage.py +158 -0
exotools/io/hdf5_storage.py +164 -0
exotools/py.typed +0 -0
exotools/utils/__init__.py +6 -0
exotools/utils/download.py +7 -0
exotools/utils/observations_fix.py +57 -0
exotools/utils/qtable_utils.py +55 -0
exotools/utils/unit_mapper.py +67 -0
exotools-0.0.1.dist-info/METADATA +125 -0
exotools-0.0.1.dist-info/RECORD +48 -0
exotools-0.0.1.dist-info/WHEEL +5 -0
exotools-0.0.1.dist-info/licenses/LICENSE +21 -0
exotools-0.0.1.dist-info/top_level.txt +1 -0

exotools/__init__.py ADDED Viewed

@@ -0,0 +1,47 @@
+"""ExoTools - Tools for working with exoplanet data."""
+__version__ = "0.1.0"
+from .db.star_system import Star, Planet, StarSystem, UncertainValue, UncertainDataSource
+from exotools.datasets.known_exoplanets import KnownExoplanetsDataset
+from exotools.datasets.candidate_exoplanets import CandidateExoplanetsDataset
+from exotools.datasets.tess import TessDataset
+from exotools.datasets.lightcurves import LightcurveDataset
+from .db import (
+    CandidateDB,
+    ExoDB,
+    GaiaDB,
+    StarSystemDB,
+    LightcurveDB,
+    LightCurvePlus,
+    TessMetaDB,
+    TicDB,
+)
+from .utils.download import DownloadParams
+__all__ = [
+    # Main dataset classes
+    "KnownExoplanetsDataset",
+    "CandidateExoplanetsDataset",
+    "TessDataset",
+    "LightcurveDataset",
+    # Database classes
+    "CandidateDB",
+    "ExoDB",
+    "GaiaDB",
+    "StarSystemDB",
+    "LightcurveDB",
+    "LightCurvePlus",
+    "TessMetaDB",
+    "TicDB",
+    # Star system types
+    "Star",
+    "Planet",
+    "StarSystem",
+    "UncertainValue",
+    "UncertainDataSource",
+    # Utility types
+    "DownloadParams",
+]

exotools/datasets/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+from exotools.datasets.candidate_exoplanets import CandidateExoplanetsDataset
+from exotools.datasets.known_exoplanets import KnownExoplanetsDataset
+from exotools.datasets.tess import TessDataset
+from exotools.datasets.lightcurves import LightcurveDataset
+from exotools.datasets.gaia_parameters import GaiaParametersDataset
+__all__ = [
+    "CandidateExoplanetsDataset",
+    "KnownExoplanetsDataset",
+    "TessDataset",
+    "LightcurveDataset",
+    "GaiaParametersDataset",
+]

exotools/datasets/_exoplanet_dataset_reducer.py ADDED Viewed

@@ -0,0 +1,96 @@
+import numpy as np
+import pandas as pd
+from astropy.table import QTable
+from exotools.db import ExoDB
+from exotools.utils.qtable_utils import get_empty_table_header, TableColumnInfo, QTableHeader
+def _get_subset_df(table: QTable) -> pd.DataFrame:
+    """Select a subset of data from the main exoplanet dataset, including upper and lower bounds"""
+    err_cols = []
+    dataset_columns = ["tic_id", "gaia_id", "disc_telescope", "rowupdate"]
+    star_columns = ["hostname", "hostname_lowercase", "st_rad", "st_rad_gaia", "st_mass"]
+    planet_columns = [
+        "pl_name",
+        "pl_rade",
+        "pl_masse",
+        "pl_dens",
+        "pl_orbeccen",
+        "pl_orbper",
+        "pl_orblper",
+        "pl_orbincl",
+        "pl_orbsmax",
+        "pl_tranmid",
+        "pl_trandur",
+        "pl_trandep",
+        "pl_imppar",
+        "pl_ratror",
+        "pl_ratdor",
+    ]
+    fields = dataset_columns + star_columns + planet_columns
+    for p in fields:
+        if f"{p}_lower" in table.colnames:
+            err_cols.extend([f"{p}_lower", f"{p}_upper"])
+    return table[fields + err_cols].to_pandas()
+def _reduce_group(group: pd.DataFrame) -> pd.Series:
+    """
+    For each column, select the first element that is not null.
+    Parameters:
+        - group: a dataframe grouped by planet name, and sorted by "rowupdate" in descending order
+    """
+    first_non_null = group.apply(lambda col: col.dropna().iloc[0] if not col.dropna().empty else np.nan)
+    return first_non_null
+def _reduce_df(table: QTable) -> pd.DataFrame:
+    """
+    Reduce all the planets having multiple rows to only one single row, taking the most updated not-null value
+    available in the dataset.
+    """
+    df = _get_subset_df(table)
+    # Sort by update date
+    sorted_df = df.sort_values("rowupdate", ascending=False)
+    # Group by 'pl_name' and apply reduce_group
+    grouped = sorted_df.groupby("pl_name", as_index=True)
+    reduced_groups = grouped.apply(_reduce_group, include_groups=False).reset_index(drop=False)
+    return reduced_groups
+def _flag_invalid_planets(dataset: pd.DataFrame):
+    """
+    Adds a pl_valid_flag to the planets which have all the required parameters
+    """
+    # Without these we can't fit the transits
+    mandatory_fields = ["pl_rade", "pl_trandur", "pl_tranmid", "pl_orbsmax"]
+    # Add the validation flag to the dataset
+    dataset["pl_valid_flag"] = ~dataset[mandatory_fields].isna().any(axis=1)
+def reduce_exoplanet_dataset(exo_db: ExoDB) -> tuple[QTable, QTableHeader]:
+    """
+    Post-processes the Known exoplanets dataset to select only the transiting planets, and reducing multiple
+    entries for each planet to only one. Additionally, impute some missing values using GAIA data.
+    """
+    # Load datasets, limiting to only Tess and Kepler transiting planets
+    exo_db = exo_db.get_transiting_planets(kepler_or_tess_only=False)
+    # Reduce exoplanet dataset
+    reduced_df = _reduce_df(exo_db.dataset_copy)
+    _flag_invalid_planets(reduced_df)
+    # Assign units and convert to QTable
+    units_map = {c: exo_db.view[c].unit for c in reduced_df.columns if c in exo_db.view.colnames}
+    reduced_table = QTable.from_pandas(reduced_df, units=units_map)
+    # Assign units to reduced dataset and convert to QTable
+    reduced_header = get_empty_table_header(reduced_table)
+    reduced_header["pl_valid_flag"] = TableColumnInfo(
+        unit=None, description="True if the planet has all the " "parameters to determine transit events"
+    )
+    return reduced_table, reduced_header

exotools/datasets/candidate_exoplanets.py ADDED Viewed

@@ -0,0 +1,44 @@
+from typing import Optional
+from astropy.table import QTable
+from exotools.db import ExoDB, CandidateDB
+from exotools.downloaders import CandidateExoplanetsDownloader
+from exotools.io import BaseStorage
+class CandidateExoplanetsDataset:
+    _DATASET_NAME_CANDIDATES = "candidate_exoplanets"
+    def __init__(self, storage: BaseStorage):
+        self._storage = storage
+    def load_candidate_exoplanets_dataset(self) -> Optional[CandidateDB]:
+        try:
+            candidate_qtable = self._storage.read_qtable(table_name=self._DATASET_NAME_CANDIDATES)
+        except ValueError:
+            print(
+                "Candidate Exoplanets dataset not found. "
+                "You need to download it first by calling download_candidate_exoplanets(store=True)."
+            )
+            return None
+        return _create_candidate_db(candidate_dataset=candidate_qtable)
+    def download_candidate_exoplanets(self, limit: Optional[int] = None, store: bool = True) -> CandidateDB:
+        print("Preparing to download candidate exoplanets dataset...")
+        candidate_qtable, candidate_header = CandidateExoplanetsDownloader().download(limit=limit)
+        if store:
+            self._storage.write_qtable(
+                table=candidate_qtable,
+                header=candidate_header,
+                table_name=self._DATASET_NAME_CANDIDATES,
+            )
+        return _create_candidate_db(candidate_qtable)
+def _create_candidate_db(candidate_dataset: QTable) -> CandidateDB:
+    ExoDB.compute_bounds(candidate_dataset)
+    return CandidateDB(candidate_dataset)

exotools/datasets/gaia_parameters.py ADDED Viewed

@@ -0,0 +1,68 @@
+from typing import Optional, Sequence
+from astropy.table import QTable
+from exotools.db import GaiaDB
+from exotools.downloaders import GaiaDownloader
+from exotools.io import BaseStorage
+class GaiaParametersDataset:
+    _DATASET_GAIA = "known_gaia_astro_parameters"
+    def __init__(self, storage: BaseStorage):
+        self._storage = storage
+    def load_gaia_parameters_dataset(self) -> Optional[GaiaDB]:
+        """
+        Load Gaia parameters dataset from storage.
+        Returns:
+            GaiaDB: Database containing Gaia parameters, or None if not found.
+        """
+        try:
+            gaia_qtable = self._storage.read_qtable(table_name=self._DATASET_GAIA)
+            return self._create_gaia_db(gaia_qtable)
+        except ValueError:
+            print("Gaia dataset not found. You need to download it first by " "calling download_gaia_parameters().")
+            return None
+    def download_gaia_parameters(self, gaia_ids: Sequence[int], store: bool = True) -> GaiaDB:
+        """
+        Download Gaia DR3 data for the given Gaia IDs.
+        Args:
+            gaia_ids: Sequence of Gaia IDs to download data for.
+            store: Whether to store the downloaded data.
+        Returns:
+            GaiaDB: Database containing the downloaded Gaia parameters.
+        """
+        print(f"Preparing to download Gaia DR3 data for {len(gaia_ids)} stars...")
+        gaia_qtable, gaia_header = GaiaDownloader().download_by_id(ids=gaia_ids)
+        if store:
+            self._storage.write_qtable(
+                table=gaia_qtable,
+                header=gaia_header,
+                table_name=self._DATASET_GAIA,
+                override=True,
+            )
+        return self._create_gaia_db(gaia_qtable)
+    @staticmethod
+    def _create_gaia_db(gaia_dataset: QTable) -> GaiaDB:
+        """
+        Create a GaiaDB instance from a QTable dataset.
+        Args:
+            gaia_dataset: QTable containing Gaia data.
+        Returns:
+            GaiaDB: Database containing processed Gaia parameters.
+        """
+        GaiaDB.impute_radius(gaia_dataset)
+        GaiaDB.compute_mean_temperature(gaia_dataset)
+        GaiaDB.compute_habitable_zone(gaia_dataset)
+        return GaiaDB(gaia_dataset)

exotools/datasets/known_exoplanets.py ADDED Viewed

@@ -0,0 +1,124 @@
+from typing import Optional
+import numpy as np
+from astropy.table import QTable
+from exotools.datasets.gaia_parameters import GaiaParametersDataset
+from exotools.db import ExoDB, GaiaDB, StarSystemDB
+from exotools.downloaders import KnownExoplanetsDownloader
+from exotools.io import BaseStorage
+from ._exoplanet_dataset_reducer import reduce_exoplanet_dataset
+class KnownExoplanetsDataset:
+    _DATASET_EXO = "known_exoplanets"
+    _DATASET_EXO_REDUCED = "known_exoplanets_reduced"
+    def __init__(self, storage: BaseStorage):
+        self._storage = storage
+        self._gaia_dataset = GaiaParametersDataset(storage)
+    def load_known_exoplanets_dataset(self, with_gaia_star_data: bool = True) -> Optional[ExoDB]:
+        gaia_db = None
+        if with_gaia_star_data:
+            gaia_db = self._gaia_dataset.load_gaia_parameters_dataset()
+            if gaia_db is None:
+                print(
+                    "Gaia dataset not found. You need to download it first by "
+                    "calling download_known_exoplanets(with_gaia_star_data=True, store=True)."
+                )
+                return None
+        try:
+            exo_qtable = self._storage.read_qtable(table_name=self._DATASET_EXO)
+        except ValueError:
+            print(
+                "Known Exoplanets dataset not found. "
+                "You need to download it first by calling download_known_exoplanets(store=True)."
+            )
+            return None
+        return _create_exo_db(exo_dataset=exo_qtable, gaia_db=gaia_db)
+    def load_star_system_dataset(self) -> Optional[StarSystemDB]:
+        try:
+            # Try to load reduced dataset
+            reduced_exo_dataset = self._storage.read_qtable(table_name=self._DATASET_EXO_REDUCED)
+            return _create_star_system_db(reduced_exo_dataset)
+        except ValueError:
+            # If it doesn't exist, compute it from the full datasets
+            gaia_db = self._gaia_dataset.load_gaia_parameters_dataset()
+            if gaia_db is None:
+                print(
+                    "Gaia dataset not found. You need to download it first by "
+                    "calling download_known_exoplanets(with_gaia_star_data=True, store=True)."
+                )
+                return None
+            try:
+                exo_qtable = self._storage.read_qtable(table_name=self._DATASET_EXO)
+            except ValueError:
+                print(
+                    "Known Exoplanets dataset not found. "
+                    "You need to download it first by calling download_known_exoplanets(store=True)."
+                )
+                return None
+            return self._create_star_system_db_from_scratch(exo_dataset=exo_qtable, gaia_db=gaia_db)
+    def download_known_exoplanets(
+        self,
+        limit: Optional[int] = None,
+        with_gaia_star_data: bool = False,
+        store: bool = True,
+    ) -> ExoDB:
+        print("Preparing to download known exoplanets dataset...")
+        exo_qtable, exo_header = KnownExoplanetsDownloader().download(limit=limit)
+        if store:
+            self._storage.write_qtable(table=exo_qtable, header=exo_header, table_name=self._DATASET_EXO, override=True)
+        if with_gaia_star_data:
+            gaia_ids = np.unique(exo_qtable["gaia_id"].value).tolist()
+            gaia_db = self._gaia_dataset.download_gaia_parameters(gaia_ids=gaia_ids, store=store)
+        else:
+            gaia_db = None
+        return _create_exo_db(exo_qtable, gaia_db)
+    def _create_star_system_db_from_scratch(self, exo_dataset: QTable, gaia_db: GaiaDB) -> StarSystemDB:
+        # Disable parsing Time columns; we need them as Quantities to copy units to the transiting qtable.
+        exo_db = _create_exo_db(exo_dataset=exo_dataset, gaia_db=gaia_db, convert_time_columns=False)
+        # Reduce exoplanet dataset to a compact representation
+        reduced_exo_dataset, header = reduce_exoplanet_dataset(exo_db=exo_db)
+        # Store the reduced dataset for future use
+        self._storage.write_qtable(
+            table=reduced_exo_dataset,
+            header=header,
+            table_name=self._DATASET_EXO_REDUCED,
+            override=True,
+        )
+        return _create_star_system_db(reduced_exo_dataset)
+def _create_exo_db(exo_dataset: QTable, gaia_db: Optional[GaiaDB] = None, convert_time_columns: bool = True) -> ExoDB:
+    ExoDB.preprocess_dataset(exo_dataset)
+    ExoDB.compute_bounds(exo_dataset)
+    # It's useful to disable parsing Time columns if we need them as Quantities,
+    # for example to copy units to another qtable.
+    if convert_time_columns:
+        ExoDB.convert_time_columns(exo_dataset)
+    if gaia_db:
+        ExoDB.impute_stellar_parameters(exo_dataset, gaia_db.view)
+    return ExoDB(exo_dataset)
+def _create_star_system_db(reduced_exo_dataset: QTable) -> StarSystemDB:
+    # Now it's safe to parse Time columns
+    ExoDB.convert_time_columns(reduced_exo_dataset)
+    reduced_exo_dataset = StarSystemDB.preprocess_dataset(reduced_exo_dataset)
+    return StarSystemDB(reduced_exo_dataset)

exotools/datasets/lightcurves.py ADDED Viewed

@@ -0,0 +1,58 @@
+from pathlib import Path
+from typing import Optional
+from exotools.db import TessMetaDB, LightcurveDB
+from exotools.downloaders import LightcurveDownloader
+from exotools.io import BaseStorage
+from exotools.utils.download import DownloadParams
+class LightcurveDataset:
+    _DATASET_LIGHTCURVES = "lightcurves"
+    def __init__(self, storage: BaseStorage, override_existing: bool = False, verbose: bool = False):
+        self._folder_path = storage.root_path / self._DATASET_LIGHTCURVES
+        self._downloader = LightcurveDownloader(override_existing=override_existing, verbose=verbose)
+    def download_lightcurves_from_tess_db(self, tess_db: TessMetaDB) -> Optional[LightcurveDB]:
+        download_params = [
+            DownloadParams(
+                url=row["dataURL"],
+                download_path=str(self._folder_path / str(row["tic_id"]) / f"{row['obs_id']}.fits"),
+            )
+            for row in tess_db.view
+        ]
+        print(f"Downloading {len(download_params)} lightcurves")
+        downloaded_paths = self._downloader.download_fits_parallel(download_params)
+        print(f"Downloaded {len(downloaded_paths)} lightcurves")
+        return self.load_lightcurve_dataset()
+    def load_lightcurve_dataset(self) -> Optional[LightcurveDB]:
+        downloaded_paths = _get_file_paths_in_subfolder(self._folder_path, file_extension="fits")
+        if len(downloaded_paths) == 0:
+            return None
+        dataset = LightcurveDB.path_map_to_qtable(downloaded_paths)
+        return LightcurveDB(dataset)
+def _get_file_paths_in_subfolder(
+    parent_path: Path,
+    file_extension: Optional[str] = None,
+    match_name: Optional[str] = None,
+) -> dict[int, list[Path]]:
+    subfolder_dict = {}
+    if not file_extension and not match_name:
+        raise ValueError("At least one between file_extension and match_name should be given")
+    pattern = match_name if match_name else f"*.{file_extension}"
+    # Iterate over each subfolder
+    for subfolder in parent_path.iterdir():
+        if subfolder.is_dir():
+            fits_files = list(subfolder.glob(pattern))
+            if fits_files:
+                subfolder_dict[int(subfolder.name)] = [Path(file) for file in fits_files]
+    return subfolder_dict

exotools/datasets/tess.py ADDED Viewed

@@ -0,0 +1,91 @@
+from typing import Sequence, Optional
+from exotools.db import TicDB, TessMetaDB
+from exotools.downloaders import TessCatalogDownloader, TessObservationsDownloader
+from exotools.io import BaseStorage
+class TessDataset:
+    _OBSERVATIONS_NAME = "tess_observations"
+    _TIC_NAME = "tess_tic"
+    _TIC_BY_ID_NAME = "tess_tic_by_id"
+    def __init__(self, storage: BaseStorage, username: Optional[str] = None, password: Optional[str] = None):
+        self._storage = storage
+        self._catalog_downloader = TessCatalogDownloader(username, password) if username and password else None
+    def download_observation_metadata(self, targets_tic_id: Sequence[int], store: bool = True) -> TessMetaDB:
+        print(f"Preparing to download TESS observation list for {len(targets_tic_id)} objects...")
+        meta_qtable, meta_header = TessObservationsDownloader().download_by_id(targets_tic_id)
+        if store:
+            self._storage.write_qtable(meta_qtable, meta_header, self._OBSERVATIONS_NAME, override=True)
+        return TessMetaDB(meta_dataset=meta_qtable)
+    def search_tic_targets(
+        self,
+        limit: Optional[int] = None,
+        star_mass_range: Optional[tuple[float, float]] = None,
+        priority_threshold: Optional[float] = None,
+        store: bool = False,
+    ) -> TicDB:
+        if self._catalog_downloader is None:
+            raise ValueError("You need to provide a username and password to download the TIC dataset.")
+        if star_mass_range is not None:
+            self._catalog_downloader.star_mass_range = star_mass_range
+        if priority_threshold is not None:
+            self._catalog_downloader.priority_threshold = priority_threshold
+        catalog_qtable, catalog_header = self._catalog_downloader.download(limit=limit)
+        if store:
+            self._storage.write_qtable(catalog_qtable, catalog_header, self._TIC_NAME, override=True)
+        return TicDB(dataset=catalog_qtable)
+    def download_tic_targets_by_ids(self, tic_ids: Sequence[int], store: bool = False) -> TicDB:
+        if self._catalog_downloader is None:
+            raise ValueError("You need to provide a username and password to download the TIC dataset.")
+        catalog_qtable, catalog_header = self._catalog_downloader.download_by_id(tic_ids)
+        if store:
+            self._storage.write_qtable(catalog_qtable, catalog_header, self._TIC_BY_ID_NAME, override=True)
+        return TicDB(dataset=catalog_qtable)
+    def load_observation_metadata(self) -> Optional[TessMetaDB]:
+        try:
+            meta_qtable = self._storage.read_qtable(table_name=self._OBSERVATIONS_NAME)
+        except ValueError:
+            print(
+                "Stored TIC dataset not found. You need to download it first by "
+                "calling download_tic_targets(store=True)."
+            )
+            return None
+        return TessMetaDB(meta_dataset=meta_qtable)
+    def load_tic_target_dataset(self) -> Optional[TicDB]:
+        try:
+            tic_qtable = self._storage.read_qtable(table_name=self._TIC_NAME)
+        except ValueError:
+            print(
+                "Stored TIC dataset not found. You need to download it first by "
+                "calling download_tic_targets(store=True)."
+            )
+            return None
+        return TicDB(dataset=tic_qtable)
+    def load_tic_target_dataset_by_id(self) -> Optional[TicDB]:
+        try:
+            tic_qtable = self._storage.read_qtable(table_name=self._TIC_BY_ID_NAME)
+        except ValueError:
+            print(
+                "Stored TIC dataset not found. You need to download it first by "
+                "calling download_tic_targets(store=True)."
+            )
+            return None
+        return TicDB(dataset=tic_qtable)

exotools/db/__init__.py ADDED Viewed

@@ -0,0 +1,21 @@
+"""Database classes for exotools."""
+from .lightcurve_plus import LightCurvePlus
+from .toi_db import CandidateDB
+from .exo_db import ExoDB
+from .gaia_db import GaiaDB
+from .starsystem_db import StarSystemDB
+from .lightcurve_db import LightcurveDB
+from .urls_db import TessMetaDB
+from .tic_db import TicDB
+__all__ = [
+    "CandidateDB",
+    "ExoDB",
+    "GaiaDB",
+    "StarSystemDB",
+    "LightcurveDB",
+    "LightCurvePlus",
+    "TessMetaDB",
+    "TicDB",
+]

exotools/db/base_db.py ADDED Viewed

@@ -0,0 +1,74 @@
+from abc import ABC, abstractmethod
+import numpy as np
+import pandas as pd
+from astropy.table import QTable
+NAN_VALUE = -1
+class BaseDB(ABC):
+    def __init__(self, dataset: QTable, id_field: str):
+        dataset.add_index(id_field)
+        self._ds = dataset
+        self._id_column = id_field
+        self._id_mask = self._ds[self._id_column] != NAN_VALUE
+        self._masked_ds = self._ds[self._id_mask]
+    def __len__(self):
+        return len(self.view)
+    @abstractmethod
+    def _factory(self, dataset: QTable) -> "BaseDB":
+        pass
+    @property
+    def view(self) -> QTable:
+        return self._ds
+    @property
+    def dataset_copy(self) -> QTable:
+        return self._ds.copy()
+    @property
+    def ids(self) -> np.ndarray:
+        return self._masked_ds[self._id_column].value
+    @property
+    def unique_ids(self) -> np.ndarray:
+        return np.unique(self.ids)
+    def match_ids(self, other_ids: np.ndarray) -> np.ndarray:
+        """
+        Get matching IDs from another id set
+        """
+        return np.isin(self.ids, other_ids)
+    def match_field(self, field_name: str, other_values: np.ndarray) -> np.ndarray:
+        """
+        Get matching IDs from another id set
+        """
+        return np.isin(self.view[field_name], other_values)
+    def select_by_id(self, other_ids: np.ndarray) -> "BaseDB":
+        """
+        Match ids and returns data
+        """
+        matching_ids = self.match_ids(other_ids)
+        return self._factory(self._masked_ds[matching_ids])
+    def select_by_mask(self, bit_mask: np.ndarray) -> "BaseDB":
+        """
+        Returns data that matches the mask
+        """
+        return self._factory(self._ds[bit_mask])
+    def select_random_sample(self, n: int, unique_ids: bool = True) -> "BaseDB":
+        if unique_ids:
+            return self.select_by_id(np.random.choice(self.unique_ids, size=n, replace=False))
+        random_indices = np.random.choice(len(self.view), size=n, replace=False)
+        return self._factory(self.view[random_indices])
+    def to_pandas(self) -> pd.DataFrame:
+        return self.view.to_pandas().reset_index()