PyPI - graflo - Versions diffs - 1.3.7__py3-none-any.whl - Mend

graflo 1.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of graflo might be problematic. Click here for more details.

Files changed (70) hide show

graflo/README.md +18 -0
graflo/__init__.py +70 -0
graflo/architecture/__init__.py +38 -0
graflo/architecture/actor.py +1276 -0
graflo/architecture/actor_util.py +450 -0
graflo/architecture/edge.py +418 -0
graflo/architecture/onto.py +376 -0
graflo/architecture/onto_sql.py +54 -0
graflo/architecture/resource.py +163 -0
graflo/architecture/schema.py +135 -0
graflo/architecture/transform.py +292 -0
graflo/architecture/util.py +89 -0
graflo/architecture/vertex.py +562 -0
graflo/caster.py +736 -0
graflo/cli/__init__.py +14 -0
graflo/cli/ingest.py +203 -0
graflo/cli/manage_dbs.py +197 -0
graflo/cli/plot_schema.py +132 -0
graflo/cli/xml2json.py +93 -0
graflo/data_source/__init__.py +48 -0
graflo/data_source/api.py +339 -0
graflo/data_source/base.py +95 -0
graflo/data_source/factory.py +304 -0
graflo/data_source/file.py +148 -0
graflo/data_source/memory.py +70 -0
graflo/data_source/registry.py +82 -0
graflo/data_source/sql.py +183 -0
graflo/db/__init__.py +44 -0
graflo/db/arango/__init__.py +22 -0
graflo/db/arango/conn.py +1025 -0
graflo/db/arango/query.py +180 -0
graflo/db/arango/util.py +88 -0
graflo/db/conn.py +377 -0
graflo/db/connection/__init__.py +6 -0
graflo/db/connection/config_mapping.py +18 -0
graflo/db/connection/onto.py +717 -0
graflo/db/connection/wsgi.py +29 -0
graflo/db/manager.py +119 -0
graflo/db/neo4j/__init__.py +16 -0
graflo/db/neo4j/conn.py +639 -0
graflo/db/postgres/__init__.py +37 -0
graflo/db/postgres/conn.py +948 -0
graflo/db/postgres/fuzzy_matcher.py +281 -0
graflo/db/postgres/heuristics.py +133 -0
graflo/db/postgres/inference_utils.py +428 -0
graflo/db/postgres/resource_mapping.py +273 -0
graflo/db/postgres/schema_inference.py +372 -0
graflo/db/postgres/types.py +148 -0
graflo/db/postgres/util.py +87 -0
graflo/db/tigergraph/__init__.py +9 -0
graflo/db/tigergraph/conn.py +2365 -0
graflo/db/tigergraph/onto.py +26 -0
graflo/db/util.py +49 -0
graflo/filter/__init__.py +21 -0
graflo/filter/onto.py +525 -0
graflo/logging.conf +22 -0
graflo/onto.py +312 -0
graflo/plot/__init__.py +17 -0
graflo/plot/plotter.py +616 -0
graflo/util/__init__.py +23 -0
graflo/util/chunker.py +807 -0
graflo/util/merge.py +150 -0
graflo/util/misc.py +37 -0
graflo/util/onto.py +422 -0
graflo/util/transform.py +454 -0
graflo-1.3.7.dist-info/METADATA +243 -0
graflo-1.3.7.dist-info/RECORD +70 -0
graflo-1.3.7.dist-info/WHEEL +4 -0
graflo-1.3.7.dist-info/entry_points.txt +5 -0
graflo-1.3.7.dist-info/licenses/LICENSE +126 -0

graflo/cli/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""Command-line interface for graflo.
+This module provides command-line tools for working with graflo, including
+utilities for data ingestion, schema management, and graph operations.
+Key Components:
+    - Command-line tools for data processing
+    - Schema management utilities
+    - Graph database operations
+Example:
+    >>> uv run ingest --config config.json --data data.json
+    >>> uv run plot_schema --config schema.yaml --output figs
+"""

graflo/cli/ingest.py ADDED Viewed

@@ -0,0 +1,203 @@
+"""Data ingestion command-line interface for graph databases.
+This module provides a CLI tool for ingesting data into graph databases. It supports
+batch processing, parallel execution, and various data formats. The tool can handle
+both initial database setup and incremental data ingestion.
+Key Features:
+    - Configurable batch processing
+    - Multi-core and multi-threaded execution
+    - Support for custom resource patterns
+    - Database initialization and cleanup options
+    - Flexible file discovery and processing
+Example:
+    $ uv run ingest \\
+        --db-config-path config/db.yaml \\
+        --schema-path config/schema.yaml \\
+        --source-path data/ \\
+        --batch-size 5000 \\
+        --n-cores 4
+"""
+import logging.config
+import pathlib
+from os.path import dirname, join, realpath
+import click
+from suthing import FileHandle
+from graflo import Caster, DataSourceRegistry, Patterns, Schema
+from graflo.db.connection.onto import DBConfig
+from graflo.data_source import DataSourceFactory
+logger = logging.getLogger(__name__)
+@click.command()
+@click.option(
+    "--db-config-path",
+    type=click.Path(exists=True, path_type=pathlib.Path),
+    required=True,
+)
+@click.option(
+    "--schema-path",
+    type=click.Path(exists=True, path_type=pathlib.Path),
+    required=True,
+)
+@click.option(
+    "--source-path",
+    type=click.Path(exists=True, path_type=pathlib.Path),
+    required=False,
+    help="Path to source data directory (required if not using --data-source-config-path)",
+)
+@click.option(
+    "--resource-pattern-config-path",
+    type=click.Path(exists=True, path_type=pathlib.Path),
+    default=None,
+)
+@click.option(
+    "--data-source-config-path",
+    type=click.Path(exists=True, path_type=pathlib.Path),
+    default=None,
+    help="Path to data source configuration file (supports API, SQL, file sources)",
+)
+@click.option("--limit-files", type=int, default=None)
+@click.option("--batch-size", type=int, default=5000)
+@click.option("--n-cores", type=int, default=1)
+@click.option(
+    "--n-threads",
+    type=int,
+    default=1,
+)
+@click.option("--fresh-start", type=bool, help="wipe existing database")
+@click.option(
+    "--init-only",
+    default=False,
+    is_flag=True,
+    help="skip ingestion; only init the db",
+)
+def ingest(
+    db_config_path,
+    schema_path,
+    source_path,
+    limit_files,
+    batch_size,
+    n_cores,
+    fresh_start,
+    init_only,
+    resource_pattern_config_path,
+    data_source_config_path,
+):
+    """Ingest data into a graph database.
+    This command processes data files and ingests them into a graph database according
+    to the provided schema. It supports various configuration options for controlling
+    the ingestion process.
+    Args:
+        db_config_path: Path to database configuration file
+        schema_path: Path to schema configuration file
+        source_path: Path to source data directory
+        limit_files: Optional limit on number of files to process
+        batch_size: Number of items to process in each batch (default: 5000)
+        n_cores: Number of CPU cores/threads to use for parallel processing (default: 1)
+        fresh_start: Whether to wipe existing database before ingestion
+        init_only: Whether to only initialize the database without ingestion
+        resource_pattern_config_path: Optional path to resource pattern configuration
+    Example:
+        $ uv run ingest \\
+            --db-config-path config/db.yaml \\
+            --schema-path config/schema.yaml \\
+            --source-path data/ \\
+            --batch-size 5000 \\
+            --n-cores 4 \\
+            --fresh-start
+    """
+    cdir = dirname(realpath(__file__))
+    logging.config.fileConfig(
+        join(cdir, "../logging.conf"), disable_existing_loggers=False
+    )
+    logging.basicConfig(level=logging.INFO)
+    schema = Schema.from_dict(FileHandle.load(schema_path))
+    # Load config from file
+    config_data = FileHandle.load(db_config_path)
+    conn_conf = DBConfig.from_dict(config_data)
+    if resource_pattern_config_path is not None:
+        patterns = Patterns.from_dict(FileHandle.load(resource_pattern_config_path))
+    else:
+        patterns = Patterns()
+    schema.fetch_resource()
+    # Create ingestion params with CLI arguments
+    from graflo.caster import IngestionParams
+    ingestion_params = IngestionParams(
+        n_cores=n_cores,
+    )
+    caster = Caster(
+        schema,
+        ingestion_params=ingestion_params,
+    )
+    # Validate that either source_path or data_source_config_path is provided
+    if data_source_config_path is None and source_path is None:
+        raise click.UsageError(
+            "Either --source-path or --data-source-config-path must be provided"
+        )
+    # Check if data source config is provided (for API, SQL, etc.)
+    if data_source_config_path is not None:
+        # Load data source configuration
+        data_source_config = FileHandle.load(data_source_config_path)
+        registry = DataSourceRegistry()
+        # Register data sources from config
+        # Config format: {"data_sources": [{"source_type": "...", "resource_name": "...", ...}]}
+        if "data_sources" in data_source_config:
+            for ds_config in data_source_config["data_sources"]:
+                ds_config_copy = ds_config.copy()
+                resource_name = ds_config_copy.pop("resource_name")
+                source_type = ds_config_copy.pop("source_type", None)
+                # Create data source using factory
+                data_source = DataSourceFactory.create_data_source(
+                    source_type=source_type, **ds_config_copy
+                )
+                registry.register(data_source, resource_name=resource_name)
+        # Update ingestion params with runtime options
+        ingestion_params.clean_start = fresh_start
+        ingestion_params.batch_size = batch_size
+        ingestion_params.init_only = init_only
+        caster.ingest_data_sources(
+            data_source_registry=registry,
+            conn_conf=conn_conf,
+            ingestion_params=ingestion_params,
+        )
+    else:
+        # Fall back to file-based ingestion
+        # Update ingestion params with runtime options
+        ingestion_params.clean_start = fresh_start
+        ingestion_params.batch_size = batch_size
+        ingestion_params.init_only = init_only
+        ingestion_params.limit_files = limit_files
+        caster.ingest(
+            output_config=conn_conf,
+            patterns=patterns,
+            ingestion_params=ingestion_params,
+        )
+if __name__ == "__main__":
+    ingest()

graflo/cli/manage_dbs.py ADDED Viewed

@@ -0,0 +1,197 @@
+"""Database management utilities for ArangoDB.
+This module provides command-line tools for managing ArangoDB databases, including
+backup and restore operations. It supports both local and Docker-based operations.
+Key Features:
+    - Database backup and restore
+    - Docker and local execution modes
+    - Configurable connection settings
+    - Batch processing of multiple databases
+Example:
+    $ uv run manage_dbs \\
+        --db-config-path config/db.yaml \\
+        --db mydb1 mydb2 \\
+        --store-directory-path /backups \\
+        --use-docker
+"""
+import logging
+import pathlib
+import subprocess
+import sys
+from datetime import date
+import click
+from suthing import FileHandle, Timer
+from graflo.db.connection.onto import ArangoConfig, DBConfig
+logger = logging.getLogger(__name__)
+def act_db(
+    conf: ArangoConfig,
+    db_name: str,
+    output_path: pathlib.Path,
+    restore: bool,
+    docker_version: str,
+    use_docker: bool,
+):
+    """Execute database backup or restore operation.
+    This function performs either a backup (arangodump) or restore (arangorestore)
+    operation on an ArangoDB database. It can use either the local arangodump/arangorestore
+    tools or run them in a Docker container.
+    Args:
+        conf: Database connection configuration
+        db_name: Name of the database to backup/restore
+        output_path: Path where backup will be stored or restored from
+        restore: Whether to restore (True) or backup (False)
+        docker_version: Version of ArangoDB Docker image to use
+        use_docker: Whether to use Docker for the operation
+    Returns:
+        None
+    Raises:
+        subprocess.CalledProcessError: If the backup/restore operation fails
+    """
+    host = f"tcp://{conf.hostname}:{conf.port}"
+    db_folder = output_path / db_name
+    cmd = "arangorestore" if restore else "arangodump"
+    if use_docker:
+        ru = (
+            f"docker run --rm --network=host -v {db_folder}:/dump"
+            f" arangodb/arangodb:{docker_version} {cmd}"
+        )
+        output = "--output-directory /dump"
+    else:
+        ru = f"{cmd}"
+        output = f"--output-directory {db_folder}"
+    dir_spec = "input" if restore else "output"
+    query = f"""{ru} --server.endpoint {host} --server.username {conf.username} --server.password "{conf.password}" --{dir_spec}-directory {output} --server.database "{db_name}" """
+    restore_suffix = "--create-database true --force-same-database true"
+    if restore:
+        query += restore_suffix
+    else:
+        query += "--overwrite true"
+    flag = subprocess.run(query, shell=True)
+    logger.info(f"returned {flag}")
+@click.command()
+@click.option(
+    "--db-config-path",
+    type=click.Path(exists=True, path_type=pathlib.Path),
+    required=False,
+    default=None,
+)
+@click.option("--db-host", type=str)
+@click.option("--db-password", type=str)
+@click.option("--db-user", type=str, default="root")
+@click.option(
+    "--db",
+    type=str,
+    multiple=True,
+    required=True,
+    help="filesystem path where to dump db snapshot",
+)
+@click.option(
+    "--store-directory-path",
+    type=click.Path(path_type=pathlib.Path),
+    required=True,
+    help="filesystem path where to dump db snapshot",
+)
+@click.option("--docker-version", type=str, default="3.12.1")
+@click.option("--restore", type=bool, default=False, is_flag=True)
+@click.option("--use-docker", type=bool, default=True)
+def manage_dbs(
+    db_config_path,
+    db_host,
+    db_password,
+    db_user,
+    db,
+    store_directory_path,
+    restore,
+    docker_version,
+    use_docker=True,
+):
+    """Manage ArangoDB database backups and restores.
+    This command provides functionality to backup and restore ArangoDB databases.
+    It supports both local execution and Docker-based operations. The command can
+    process multiple databases in sequence and provides timing information for
+    each operation.
+    Args:
+        db_config_path: Path to database configuration file (optional)
+        db_host: Database host address (if not using config file)
+        db_password: Database password (if not using config file)
+        db_user: Database username (default: root)
+        db: List of database names to process
+        store_directory_path: Path where backups will be stored/restored
+        restore: Whether to restore (True) or backup (False)
+        docker_version: Version of ArangoDB Docker image (default: 3.12.1)
+        use_docker: Whether to use Docker for operations (default: True)
+    Example:
+        $ uv run manage_dbs \\
+            --db-config-path config/db.yaml \\
+            --db mydb1 mydb2 \\
+            --store-directory-path /backups \\
+            --use-docker
+    """
+    if db_config_path is None:
+        # Construct URI from host
+        uri = db_host if db_host and "://" in db_host else f"http://{db_host}"
+        db_conf = ArangoConfig(uri=uri, username=db_user, password=db_password)
+    else:
+        conn_conf = FileHandle.load(fpath=db_config_path)
+        db_conf_raw = DBConfig.from_dict(conn_conf)
+        # Type checker can't infer the specific type, but we know it's ArangoConfig from the config
+        if not isinstance(db_conf_raw, ArangoConfig):
+            raise ValueError(f"Expected ArangoConfig, got {type(db_conf_raw)}")
+        db_conf: ArangoConfig = db_conf_raw
+    action = "restoring" if restore else "dumping"
+    if restore:
+        out_path = store_directory_path
+    else:
+        out_path = (
+            store_directory_path.expanduser().resolve() / date.today().isoformat()
+        )
+        if not out_path.exists():
+            out_path.mkdir(exist_ok=True)
+    with Timer() as t_all:
+        for dbname in db:
+            with Timer() as t_dump:
+                try:
+                    act_db(
+                        db_conf,
+                        dbname,
+                        out_path,
+                        restore=restore,
+                        docker_version=docker_version,
+                        use_docker=use_docker,
+                    )
+                except Exception as e:
+                    logging.error(e)
+            logging.info(
+                f"{action} {dbname} took  {t_dump.mins} mins {t_dump.secs:.2f} sec"
+            )
+    logging.info(f"all {action} took  {t_all.mins} mins {t_all.secs:.2f} sec")
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout)
+    manage_dbs()

graflo/cli/plot_schema.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""Schema visualization tool for graph databases.
+This module provides functionality for visualizing graph database schemas using Graphviz.
+It includes tools for plotting vertex-to-vertex relationships, vertex fields, and resource
+mappings. The module supports various visualization options and graph layout customization.
+Key Components:
+    - SchemaPlotter: Main class for schema visualization
+    - knapsack: Utility for optimizing graph layout
+    - plot_schema: CLI command for schema visualization
+Graphviz Attributes Reference:
+    - https://renenyffenegger.ch/notes/tools/Graphviz/attributes/index
+    - https://rsms.me/graphviz/
+    - https://graphviz.readthedocs.io/en/stable/examples.html
+    - https://graphviz.org/doc/info/attrs.html
+Example:
+    >>> plot_schema(schema_path="schema.yaml", figure_output_path="schema.png")
+"""
+import logging
+import sys
+import click
+from graflo.plot.plotter import SchemaPlotter
+"""
+graphviz attributes
+https://renenyffenegger.ch/notes/tools/Graphviz/attributes/index
+https://rsms.me/graphviz/
+https://graphviz.readthedocs.io/en/stable/examples.html
+https://graphviz.org/doc/info/attrs.html
+usage:
+    color='red',style='filled', fillcolor='blue',shape='square'
+to keep
+level_one = [node1, node2]
+sg_one = ag.add_subgraph(level_one, rank='same')
+"""
+def knapsack(weights, ks_size=7):
+    """Split a set of weights into groups of at most threshold weight.
+    This function implements a greedy algorithm to partition weights into groups
+    where each group's total weight is at most ks_size. It's used for optimizing
+    graph layout by balancing node distribution.
+    Args:
+        weights: List of weights to partition
+        ks_size: Maximum total weight per group (default: 7)
+    Returns:
+        list[list[int]]: List of groups, where each group is a list of indices
+            from the original weights list
+    Raises:
+        ValueError: If any single weight exceeds ks_size
+    Example:
+        >>> weights = [3, 4, 2, 5, 1]
+        >>> knapsack(weights, ks_size=7)
+        [[4, 0, 2], [1, 3]]  # Groups with weights [6, 7]
+    """
+    pp = sorted(list(zip(range(len(weights)), weights)), key=lambda x: x[1])
+    print(pp)
+    acc = []
+    if pp[-1][1] > ks_size:
+        raise ValueError("One of the items is larger than the knapsack")
+    while pp:
+        w_item = []
+        w_item += [pp.pop()]
+        ww_item = sum([item for _, item in w_item])
+        while ww_item < ks_size:
+            cnt = 0
+            for j, item in enumerate(pp[::-1]):
+                diff = ks_size - item[1] - ww_item
+                if diff >= 0:
+                    cnt += 1
+                    w_item += [pp.pop(len(pp) - j - 1)]
+                    ww_item += w_item[-1][1]
+                else:
+                    break
+            if ww_item >= ks_size or cnt == 0:
+                acc += [w_item]
+                break
+    acc_ret = [[y for y, _ in subitem] for subitem in acc]
+    return acc_ret
+@click.command()
+@click.option("-c", "--schema-path", type=click.Path(), required=True)
+@click.option("-o", "--figure-output-path", type=click.Path(), required=True)
+@click.option("-p", "--prune-low-degree-nodes", type=bool, default=False)
+def plot_schema(schema_path, figure_output_path, prune_low_degree_nodes):
+    """Generate visualizations of the graph database schema.
+    This command creates multiple visualizations of the schema:
+    1. Vertex-to-vertex relationships
+    2. Vertex fields and their relationships
+    3. Resource mappings
+    The visualizations are saved to the specified output path.
+    Args:
+        schema_path: Path to the schema configuration file
+        figure_output_path: Path where the visualization will be saved
+        prune_low_degree_nodes: Whether to remove nodes with low connectivity
+            from the visualization (default: False)
+    Example:
+        $ uv run plot_schema -c schema.yaml -o schema.png
+    """
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout)
+    plotter = SchemaPlotter(schema_path, figure_output_path)
+    plotter.plot_vc2vc(prune_leaves=prune_low_degree_nodes)
+    plotter.plot_vc2fields()
+    plotter.plot_resources()
+    # plotter.plot_source2vc()
+    # plotter.plot_source2vc_detailed()
+if __name__ == "__main__":
+    plot_schema()

graflo/cli/xml2json.py ADDED Viewed

@@ -0,0 +1,93 @@
+"""XML to JSON conversion tool for data preprocessing.
+This module provides a command-line tool for converting XML files to JSON format,
+with support for different data sources and chunking options. It's particularly
+useful for preprocessing scientific literature data from sources like Web of Science
+and PubMed.
+Key Features:
+    - Support for Web of Science and PubMed XML formats
+    - Configurable chunking for large files
+    - Batch processing of multiple files
+    - Customizable output format
+Example:
+    $ uv run xml2json \\
+        --source-path data/wos.xml \\
+        --chunk-size 1000 \\
+        --mode wos_csv
+"""
+import logging
+import pathlib
+import sys
+import click
+from graflo.util.chunker import convert, force_list_wos, tag_wos
+logger = logging.getLogger(__name__)
+@click.command()
+@click.option(
+    "-s",
+    "--source-path",
+    type=click.Path(path_type=pathlib.Path),
+    required=True,
+)
+@click.option("-c", "--chunk-size", type=int, default=1000)
+@click.option("-m", "--max-chunks", type=int, default=None)
+@click.option("--mode", type=str)
+def do(source_path, chunk_size, max_chunks, mode):
+    """Convert XML files to JSON format.
+    This command processes XML files and converts them to JSON format, with support
+    for different data sources and chunking options.
+    Args:
+        source_path: Path to source XML file or directory
+        chunk_size: Number of records per output file (default: 1000)
+        max_chunks: Maximum number of chunks to process (default: None)
+        mode: Data source mode ('wos_csv' or 'pubmed')
+    Example:
+        $ uv run xml2json \\
+            --source-path data/wos.xml \\
+            --chunk-size 1000 \\
+            --mode wos_csv
+    """
+    if mode == "wos_csv":
+        pattern = r"xmlns=\".*[^\"]\"(?=>)"
+        force_list = force_list_wos
+        tag = tag_wos
+    elif mode == "pubmed":
+        pattern = None
+        force_list = None
+        tag = "PubmedArticle"
+    else:
+        raise ValueError(f"Unknown mode {mode}")
+    if source_path.is_dir():
+        files = [
+            fp for fp in source_path.iterdir() if not fp.is_dir() and "xml" in fp.name
+        ]
+    else:
+        files = [source_path] if ".xml." in source_path.name else []
+    for fp in files:
+        target_root = str(fp.parent / fp.name.split(".")[0])
+        convert(
+            fp,
+            target_root=target_root,
+            chunk_size=chunk_size,
+            max_chunks=max_chunks,
+            pattern=pattern,
+            force_list=force_list,
+            root_tag=tag,
+        )
+if __name__ == "__main__":
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout)
+    do()

graflo/data_source/__init__.py ADDED Viewed

@@ -0,0 +1,48 @@
+"""Data source abstraction layer for graph database ingestion.
+This package provides a unified interface for different data source types,
+separating "where data comes from" (DataSource) from "how it's transformed" (Resource).
+Key Components:
+    - AbstractDataSource: Base class for all data sources
+    - FileDataSource: File-based data sources (JSON, JSONL, CSV/TSV)
+    - APIDataSource: REST API data source
+    - SQLDataSource: SQL database data source
+    - DataSourceRegistry: Maps DataSources to Resource names
+Example:
+    >>> from graflo.data_source import FileDataSource, DataSourceRegistry
+    >>> source = FileDataSource(path="data.json", file_type="json")
+    >>> registry = DataSourceRegistry()
+    >>> registry.register(source, resource_name="users")
+"""
+from .api import APIConfig, APIDataSource, PaginationConfig
+from .base import AbstractDataSource, DataSourceType
+from .factory import DataSourceFactory
+from .file import (
+    FileDataSource,
+    JsonFileDataSource,
+    JsonlFileDataSource,
+    TableFileDataSource,
+)
+from .memory import InMemoryDataSource
+from .registry import DataSourceRegistry
+from .sql import SQLConfig, SQLDataSource
+__all__ = [
+    "AbstractDataSource",
+    "APIConfig",
+    "APIDataSource",
+    "DataSourceFactory",
+    "DataSourceRegistry",
+    "DataSourceType",
+    "FileDataSource",
+    "InMemoryDataSource",
+    "JsonFileDataSource",
+    "JsonlFileDataSource",
+    "PaginationConfig",
+    "SQLConfig",
+    "SQLDataSource",
+    "TableFileDataSource",
+]