PyPI - linkml-store - Versions diffs - 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl - Mend

linkml-store 0.1.10py3-none-any.whl → 0.1.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of linkml-store might be problematic. Click here for more details.

Files changed (24) hide show

linkml_store/api/client.py +63 -7
linkml_store/api/collection.py +138 -30
linkml_store/api/config.py +48 -6
linkml_store/api/database.py +45 -27
linkml_store/api/stores/duckdb/duckdb_collection.py +16 -0
linkml_store/api/stores/duckdb/duckdb_database.py +16 -2
linkml_store/api/stores/filesystem/filesystem_collection.py +11 -4
linkml_store/api/stores/filesystem/filesystem_database.py +10 -1
linkml_store/api/stores/mongodb/mongodb_collection.py +6 -2
linkml_store/api/stores/mongodb/mongodb_database.py +1 -36
linkml_store/api/stores/solr/solr_collection.py +4 -4
linkml_store/cli.py +35 -17
linkml_store/index/__init__.py +16 -2
linkml_store/index/implementations/llm_indexer.py +2 -1
linkml_store/index/indexer.py +13 -2
linkml_store/utils/file_utils.py +37 -0
linkml_store/utils/format_utils.py +68 -7
linkml_store/utils/pandas_utils.py +40 -0
linkml_store/utils/sql_utils.py +2 -1
{linkml_store-0.1.10.dist-info → linkml_store-0.1.11.dist-info}/METADATA +36 -3
{linkml_store-0.1.10.dist-info → linkml_store-0.1.11.dist-info}/RECORD +24 -22
{linkml_store-0.1.10.dist-info → linkml_store-0.1.11.dist-info}/LICENSE +0 -0
{linkml_store-0.1.10.dist-info → linkml_store-0.1.11.dist-info}/WHEEL +0 -0
{linkml_store-0.1.10.dist-info → linkml_store-0.1.11.dist-info}/entry_points.txt +0 -0

linkml_store/api/stores/duckdb/duckdb_database.py CHANGED Viewed

@@ -26,6 +26,8 @@ TYPE_MAP = {
     "JSON": "Any",
 }
+MEMORY_HANDLE = "duckdb:///:memory:"
 logger = logging.getLogger(__name__)
@@ -49,7 +51,7 @@ class DuckDBDatabase(Database):
     def __init__(self, handle: Optional[str] = None, recreate_if_exists: bool = False, **kwargs):
         if handle is None:
-            handle = "duckdb:///:memory:"
+            handle = MEMORY_HANDLE
         if recreate_if_exists:
             path = Path(handle.replace("duckdb:///", ""))
             if path.exists():
@@ -76,6 +78,17 @@ class DuckDBDatabase(Database):
     def close(self, **kwargs):
         self.engine.dispose()
+    def drop(self, missing_ok=True, **kwargs):
+        self.close()
+        if self.handle == MEMORY_HANDLE:
+            return
+        path = Path(self.handle.replace("duckdb:///", ""))
+        if path.exists():
+            path.unlink()
+        else:
+            if not missing_ok:
+                raise FileNotFoundError(f"Database file not found: {path}")
     def query(self, query: Query, **kwargs) -> QueryResult:
         json_encoded_cols = []
         if query.from_table:
@@ -94,7 +107,8 @@ class DuckDBDatabase(Database):
             if sv:
                 cd = None
                 for c in self._collections.values():
-                    if c.name == query.from_table or c.metadata.alias == query.from_table:
+                    # if c.name == query.from_table or c.metadata.alias == query.from_table:
+                    if c.alias == query.from_table or c.target_class_name == query.from_table:
                         cd = c.class_definition()
                         break
                 if cd:

linkml_store/api/stores/filesystem/filesystem_collection.py CHANGED Viewed

@@ -31,7 +31,7 @@ class FileSystemCollection(Collection[DatabaseType]):
     @property
     def path_to_file(self):
-        return Path(self.parent.directory_path) / f"{self.name}.{self.file_format}"
+        return Path(self.parent.directory_path) / f"{self.alias}.{self.file_format}"
     @property
     def objects_as_list(self) -> List[OBJECT]:
@@ -150,13 +150,20 @@ class FileSystemCollection(Collection[DatabaseType]):
         curr_objects = [o for o in self.objects_as_list if not matches(o)]
         self._set_objects(curr_objects)
-    def query(self, query: Query, **kwargs) -> QueryResult:
+    def query(self, query: Query, limit: Optional[int] = None, offset: Optional[int] = None, **kwargs) -> QueryResult:
+        limit = limit or query.limit
+        offset = offset or query.offset
+        if offset is None:
+            offset = 0
         where = query.where_clause or {}
         match = mongo_query_to_match_function(where)
         rows = [o for o in self.objects_as_list if match(o)]
         count = len(rows)
-        return QueryResult(query=query, num_rows=count, rows=rows)
+        if limit is None or limit < 0:
+            limit = count
+        # TODO: avoid recalculating
+        returned_row = rows[offset : offset + limit]
+        return QueryResult(query=query, num_rows=count, rows=returned_row)
     def query_facets(
         self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs

linkml_store/api/stores/filesystem/filesystem_database.py CHANGED Viewed

@@ -9,6 +9,7 @@ from linkml_runtime import SchemaView
 from linkml_store.api import Database
 from linkml_store.api.config import DatabaseConfig
 from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
+from linkml_store.utils.file_utils import safe_remove_directory
 from linkml_store.utils.format_utils import Format, load_objects
 logger = logging.getLogger(__name__)
@@ -20,6 +21,8 @@ class FileSystemDatabase(Database):
     directory_path: Optional[Path] = None
     default_file_format: Optional[str] = None
+    no_backup_on_drop: bool = False
     def __init__(self, handle: Optional[str] = None, **kwargs):
         handle = handle.replace("file:", "")
         if handle.startswith("//"):
@@ -43,6 +46,12 @@ class FileSystemDatabase(Database):
     def close(self, **kwargs):
         pass
+    def drop(self, no_backup=False, **kwargs):
+        self.close()
+        path = self.directory_path
+        if path.exists():
+            safe_remove_directory(path, no_backup=self.no_backup_on_drop or no_backup)
     def init_collections(self):
         metadata = self.metadata
         if self._collections is None:
@@ -63,7 +72,7 @@ class FileSystemDatabase(Database):
                     self._collections[n] = collection
                     collection._set_objects(objs)
-    def induce_schema_view(self) -> SchemaView:
+    def xxxinduce_schema_view(self) -> SchemaView:
         logger.info(f"Inducing schema view for {self.handle}")
         sb = SchemaBuilder()

linkml_store/api/stores/mongodb/mongodb_collection.py CHANGED Viewed

@@ -23,11 +23,15 @@ class MongoDBCollection(Collection):
     @property
     def mongo_collection(self) -> MongoCollection:
-        if not self.name:
+        # collection_name = self.alias or self.name
+        collection_name = self.alias
+        if not collection_name:
             raise ValueError("Collection name not set")
-        collection_name = self.alias or self.name
         return self.parent.native_db[collection_name]
+    def _check_if_initialized(self) -> bool:
+        return self.alias in self.parent.native_db.list_collection_names()
     def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
         if not isinstance(objs, list):
             objs = [objs]

linkml_store/api/stores/mongodb/mongodb_database.py CHANGED Viewed

@@ -3,9 +3,6 @@
 import logging
 from typing import Optional
-from linkml_runtime import SchemaView
-from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
-from linkml_runtime.utils.schema_builder import SchemaBuilder
 from pymongo import MongoClient
 from pymongo.database import Database as NativeDatabase
@@ -63,10 +60,9 @@ class MongoDBDatabase(Database):
             self._native_client.close()
     def drop(self, **kwargs):
-        self.native_client.drop_database(self.metadata.alias)
+        self.native_client.drop_database(self.native_db.name)
     def query(self, query: Query, **kwargs) -> QueryResult:
-        # TODO: DRY
         if query.from_table:
             collection = self.get_collection(query.from_table)
             return collection.query(query, **kwargs)
@@ -81,34 +77,3 @@ class MongoDBDatabase(Database):
             if collection_name not in self._collections:
                 collection = MongoDBCollection(name=collection_name, parent=self)
                 self._collections[collection_name] = collection
-    def induce_schema_view(self) -> SchemaView:
-        logger.info(f"Inducing schema view for {self.handle}")
-        sb = SchemaBuilder()
-        schema = sb.schema
-        for collection_name in self.native_db.list_collection_names():
-            sb.add_class(collection_name)
-            mongo_collection = self.native_db[collection_name]
-            sample_doc = mongo_collection.find_one()
-            if sample_doc:
-                for field, value in sample_doc.items():
-                    if field == "_id":
-                        continue
-                    sd = SlotDefinition(field)
-                    if isinstance(value, list):
-                        sd.multivalued = True
-                    if isinstance(value, dict):
-                        sd.inlined = True
-                    sb.schema.classes[collection_name].attributes[sd.name] = sd
-        sb.add_defaults()
-        for cls_name in schema.classes:
-            if cls_name in self.metadata.collections:
-                collection_metadata = self.metadata.collections[cls_name]
-                if collection_metadata.attributes:
-                    del schema.classes[cls_name]
-                    cls = ClassDefinition(name=collection_metadata.type, attributes=collection_metadata.attributes)
-                    schema.classes[cls.name] = cls
-        return SchemaView(schema)

linkml_store/api/stores/solr/solr_collection.py CHANGED Viewed

@@ -18,7 +18,7 @@ class SolrCollection(Collection):
     @property
     def _collection_base(self) -> str:
         if self.parent.use_cores:
-            base_url = f"{self.parent.base_url}/{self.name}"
+            base_url = f"{self.parent.base_url}/{self.alias}"
         else:
             base_url = self.parent.base_url
         return base_url
@@ -37,7 +37,7 @@ class SolrCollection(Collection):
         if not qfs:
             raise ValueError("No searchable slots configured for Solr collection")
         solr_query = self._build_solr_query(where, search_term=query, extra={"defType": index_name, "qf": qfs})
-        logger.info(f"Querying Solr collection {self.name} with query: {solr_query}")
+        logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
         response = requests.get(f"{self._collection_base}/select", params=solr_query)
         response.raise_for_status()
@@ -50,7 +50,7 @@ class SolrCollection(Collection):
     def query(self, query: Query, **kwargs) -> QueryResult:
         solr_query = self._build_solr_query(query)
-        logger.info(f"Querying Solr collection {self.name} with query: {solr_query}")
+        logger.info(f"Querying Solr collection {self.alias} with query: {solr_query}")
         response = requests.get(f"{self._collection_base}/select", params=solr_query)
         response.raise_for_status()
@@ -69,7 +69,7 @@ class SolrCollection(Collection):
         solr_query["facet.field"] = facet_columns
         solr_query["facet.limit"] = facet_limit
-        logger.info(f"Querying Solr collection {self.name} for facets with query: {solr_query}")
+        logger.info(f"Querying Solr collection {self.alias} for facets with query: {solr_query}")
         response = requests.get(f"{self._collection_base}/select", params=solr_query)
         response.raise_for_status()

linkml_store/cli.py CHANGED Viewed

@@ -16,6 +16,7 @@ from linkml_store.index.implementations.simple_indexer import SimpleIndexer
 from linkml_store.index.indexer import Indexer
 from linkml_store.utils.format_utils import Format, guess_format, load_objects, render_output, write_output
 from linkml_store.utils.object_utils import object_path_update
+from linkml_store.utils.pandas_utils import facet_summary_to_dataframe_unmelted
 index_type_option = click.option(
     "--index-type",
@@ -87,6 +88,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
 @click.option("--set", help="Metadata settings in the form PATHEXPR=value", multiple=True)
 @click.option("-v", "--verbose", count=True)
 @click.option("-q", "--quiet/--no-quiet")
+@click.option("--base-dir", "-B", help="Base directory for the client configuration")
 @click.option(
     "--stacktrace/--no-stacktrace",
     default=False,
@@ -94,7 +96,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
     help="If set then show full stacktrace on error",
 )
 @click.pass_context
-def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, config, set):
+def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, config, set, **kwargs):
     """A CLI for interacting with the linkml-store."""
     if not stacktrace:
         sys.tracebacklimit = 0
@@ -117,7 +119,7 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
     if quiet:
         logger.setLevel(logging.ERROR)
     ctx.ensure_object(dict)
-    client = Client().from_config(config) if config else Client()
+    client = Client().from_config(config, **kwargs) if config else Client()
     settings = ContextSettings(client=client, database_name=database, collection_name=collection)
     ctx.obj["settings"] = settings
     # DEPRECATED
@@ -150,7 +152,7 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
         #    raise ValueError("Collection must be specified if there are multiple collections.")
         if settings.database and settings.database.list_collections():
             collection = settings.database.list_collections()[0]
-            settings.collection_name = collection.name
+            settings.collection_name = collection.alias
 @cli.command()
@@ -180,15 +182,15 @@ def insert(ctx, files, object, format):
             objects = load_objects(file_path, format=format)
         else:
             objects = load_objects(file_path)
-        logger.info(f"Inserting {len(objects)} objects from {file_path} into collection '{collection.name}'.")
+        logger.info(f"Inserting {len(objects)} objects from {file_path} into collection '{collection.alias}'.")
         collection.insert(objects)
-        click.echo(f"Inserted {len(objects)} objects from {file_path} into collection '{collection.name}'.")
+        click.echo(f"Inserted {len(objects)} objects from {file_path} into collection '{collection.alias}'.")
     if object:
         for object_str in object:
             logger.info(f"Parsing: {object_str}")
             objects = yaml.safe_load(object_str)
             collection.insert(objects)
-            click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{collection.name}'.")
+            click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{collection.alias}'.")
     collection.commit()
@@ -324,7 +326,7 @@ def query(ctx, where, limit, output_type, output):
     """
     collection = ctx.obj["settings"].collection
     where_clause = yaml.safe_load(where) if where else None
-    query = Query(from_table=collection.name, where_clause=where_clause, limit=limit)
+    query = Query(from_table=collection.alias, where_clause=where_clause, limit=limit)
     result = collection.query(query)
     output_data = render_output(result.rows, output_type)
     if output:
@@ -341,7 +343,7 @@ def query(ctx, where, limit, output_type, output):
 def list_collections(ctx, **kwargs):
     db = ctx.obj["settings"].database
     for collection in db.list_collections(**kwargs):
-        click.echo(collection.name)
+        click.echo(collection.alias)
         click.echo(render_output(collection.metadata))
@@ -351,8 +353,9 @@ def list_collections(ctx, **kwargs):
 @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
 @click.option("--output", "-o", type=click.Path(), help="Output file path")
 @click.option("--columns", "-S", help="Columns to facet on")
+@click.option("--wide/--no-wide", "-U/--no-U", default=False, show_default=True, help="Wide table")
 @click.pass_context
-def fq(ctx, where, limit, columns, output_type, output):
+def fq(ctx, where, limit, columns, output_type, wide, output):
     """
     Query facets from the specified collection.
@@ -379,11 +382,22 @@ def fq(ctx, where, limit, columns, output_type, output):
             return "+".join([str(x) for x in key])
         return key
-    count_dict = {}
-    for key, value in results.items():
-        value_as_dict = {_untuple(v[0:-1]): v[-1] for v in value}
-        count_dict[_untuple(key)] = value_as_dict
-    output_data = render_output(count_dict, output_type)
+    if wide:
+        results_obj = facet_summary_to_dataframe_unmelted(results)
+    else:
+        if output_type == Format.PYTHON.value:
+            results_obj = results
+        elif output_type in [Format.TSV.value, Format.CSV.value]:
+            results_obj = []
+            for fc, data in results.items():
+                for v, c in data:
+                    results_obj.append({"facet": fc, "value": v, "count": c})
+        else:
+            results_obj = {}
+            for key, value in results.items():
+                value_as_dict = {_untuple(v[0:-1]): v[-1] for v in value}
+                results_obj[_untuple(key)] = value_as_dict
+    output_data = render_output(results_obj, output_type)
     if output:
         with open(output, "w") as f:
             f.write(output_data)
@@ -403,14 +417,17 @@ def _get_index(index_type=None, **kwargs) -> Indexer:
 @click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
 @click.option("--output-type", "-O", type=format_choice, default=Format.FORMATTED.value, help="Output format")
 @click.option("--output", "-o", type=click.Path(), help="Output file path")
+@click.option(
+    "--limit", "-l", default=-1, show_default=True, type=click.INT, help="Maximum number of results to return"
+)
 @click.pass_context
-def describe(ctx, where, output_type, output):
+def describe(ctx, where, output_type, output, limit):
     """
     Describe the collection schema.
     """
     where_clause = yaml.safe_load(where) if where else None
     collection = ctx.obj["settings"].collection
-    df = collection.find(where_clause, limit=1).rows_dataframe
+    df = collection.find(where_clause, limit=limit).rows_dataframe
     write_output(df.describe(include="all").transpose(), output_type, target=output)
@@ -468,7 +485,7 @@ def search(ctx, search_term, where, limit, index_type, output_type, output, auto
     """Search objects in the specified collection."""
     collection = ctx.obj["settings"].collection
     ix = get_indexer(index_type)
-    logger.info(f"Attaching index to collection {collection.name}: {ix.model_dump()}")
+    logger.info(f"Attaching index to collection {collection.alias}: {ix.model_dump()}")
     collection.attach_indexer(ix, auto_index=auto_index)
     result = collection.search(search_term, where=where, limit=limit)
     output_data = render_output([{"score": row[0], **row[1]} for row in result.ranked_rows], output_type)
@@ -498,6 +515,7 @@ def indexes(ctx):
 def validate(ctx, output_type, output):
     """Validate objects in the specified collection."""
     collection = ctx.obj["settings"].collection
+    logger.info(f"Validating collection {collection.alias}")
     validation_results = [json_dumper.to_dict(x) for x in collection.iter_validate_collection()]
     output_data = render_output(validation_results, output_type)
     if output:

linkml_store/index/__init__.py CHANGED Viewed

@@ -1,3 +1,14 @@
+"""
+Indexers package.
+Indexers allow indexes to be added to existing :class:`Collection` objects.
+Current two are supported:
+* simple: :class:`SimpleIndexer`
+* llm: :class:`LLMIndexer`
+"""
 from typing import Type
 from linkml_store.index.implementations.llm_indexer import LLMIndexer
@@ -14,7 +25,7 @@ def get_indexer_class(name: str) -> Type[Indexer]:
     """
     Get an indexer class by name.
-    :param name: the name of the indexer
+    :param name: the name of the indexer (simple, llm, ...)
     :return: the indexer class
     """
     if name not in INDEXER_CLASSES:
@@ -26,7 +37,10 @@ def get_indexer(index_type: str, **kwargs) -> Indexer:
     """
     Get an indexer by name.
-    :param name: the name of the indexer
+    >>> simple_indexer = get_indexer("simple")
+    >>> llm_indexer = get_indexer("llm")
+    :param name: the name of the indexer (simple, llm, ...)
     :param kwargs: additional arguments to pass to the indexer
     :return: the indexer
     """

linkml_store/index/implementations/llm_indexer.py CHANGED Viewed

@@ -74,7 +74,7 @@ class LLMIndexer(Indexer):
             embeddings_client = Client()
             config = CollectionConfig(
-                name=coll_name,
+                alias=coll_name,
                 type="Embeddings",
                 attributes={
                     "text": {"range": "string"},
@@ -116,6 +116,7 @@ class LLMIndexer(Indexer):
                     embeddings_collection.insert(
                         {"text": uncached_texts[i], "embedding": embeddings[index], "model_id": model_id}
                     )
+                embeddings_collection.commit()
         else:
             logger.info(f"Embedding {len(texts)} texts")
             embeddings = model.embed_multi(texts)

linkml_store/index/indexer.py CHANGED Viewed

@@ -11,11 +11,22 @@ logger = logging.getLogger(__name__)
 class TemplateSyntaxEnum(str, Enum):
+    """
+    Template syntax types.
+    """
     jinja2 = "jinja2"
     fstring = "fstring"
-def cosine_similarity(vector1, vector2):
+def cosine_similarity(vector1, vector2) -> float:
+    """
+    Calculate the cosine similarity between two vectors
+    :param vector1:
+    :param vector2:
+    :return:
+    """
     dot_product = np.dot(vector1, vector2)
     norm1 = np.linalg.norm(vector1)
     norm2 = np.linalg.norm(vector2)
@@ -24,7 +35,7 @@ def cosine_similarity(vector1, vector2):
 class Indexer(BaseModel):
     """
-    An index operates on a collection in order to search for objects.
+    An indexer operates on a collection in order to search for objects.
     """
     name: Optional[str] = None

linkml_store/utils/file_utils.py ADDED Viewed

@@ -0,0 +1,37 @@
+import logging
+import shutil
+import tempfile
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+# Set up logging
+logger = logging.getLogger(__name__)
+def safe_remove_directory(dir_path: Path, no_backup: bool = False) -> Optional[Path]:
+    # Ensure the directory exists
+    if not dir_path.exists():
+        raise FileNotFoundError(f"Directory does not exist: {dir_path}")
+    try:
+        if no_backup:
+            # Move to a temporary directory instead of permanent removal
+            with tempfile.TemporaryDirectory() as tmpdir:
+                tmp_path = Path(tmpdir) / dir_path.name
+                shutil.move(str(dir_path), str(tmp_path))
+                logger.info(f"Directory moved to temporary location: {tmp_path}")
+                # The directory will be automatically removed when exiting the context manager
+            return None
+        else:
+            # Create a backup directory name with timestamp
+            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+            backup_dir = dir_path.with_name(f"{dir_path.name}_backup_{timestamp}")
+            # Move the directory to the backup location
+            shutil.move(str(dir_path), str(backup_dir))
+            logger.info(f"Directory backed up to: {backup_dir}")
+            return backup_dir
+    except Exception as e:
+        logger.error(f"An error occurred: {e}")
+        return None

linkml_store/utils/format_utils.py CHANGED Viewed

@@ -7,8 +7,10 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional, TextIO, Type, Union
 import pandas as pd
+import pystow
 import yaml
 from pydantic import BaseModel
+from tabulate import tabulate
 class Format(Enum):
@@ -21,12 +23,40 @@ class Format(Enum):
     YAML = "yaml"
     TSV = "tsv"
     CSV = "csv"
+    PYTHON = "python"
     PARQUET = "parquet"
     FORMATTED = "formatted"
+    TABLE = "table"
+def load_objects_from_url(
+    url: str,
+    format: Union[Format, str] = None,
+    expected_type: Type = None,
+    local_path: Optional[str] = None,
+    **kwargs,
+) -> List[Dict[str, Any]]:
+    """
+    Load objects from a URL in JSON, JSONLines, YAML, CSV, or TSV format.
+    :param url: The URL to the file.
+    :param format: The format of the file. Can be a Format enum or a string value.
+    :param expected_type: The target type to load the objects into.
+    :param local_path: The local path to save the file to.
+    :return: A list of dictionaries representing the loaded objects.
+    """
+    local_path = pystow.ensure("linkml", "linkml-store", url=url)
+    objs = load_objects(local_path, format=format, expected_type=expected_type, **kwargs)
+    if not objs:
+        raise ValueError(f"No objects loaded from URL: {url}")
+    return objs
 def load_objects(
-    file_path: Union[str, Path], format: Union[Format, str] = None, expected_type: Type = None
+    file_path: Union[str, Path],
+    format: Union[Format, str] = None,
+    expected_type: Type = None,
+    header_comment_token: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
     """
     Load objects from a file in JSON, JSONLines, YAML, CSV, or TSV format.
@@ -37,7 +67,7 @@ def load_objects(
     :param file_path: The path to the file.
     :param format: The format of the file. Can be a Format enum or a string value.
-    :param expected_type: The target type to load the objects into.
+    :param expected_type: The target type to load the objects into, e.g. list
     :return: A list of dictionaries representing the loaded objects.
     """
     if isinstance(format, str):
@@ -48,6 +78,12 @@ def load_objects(
     if not format and (file_path.endswith(".parquet") or file_path.endswith(".pq")):
         format = Format.PARQUET
+    if not format and file_path.endswith(".tsv"):
+        format = Format.TSV
+    if not format and file_path.endswith(".csv"):
+        format = Format.CSV
+    if not format and file_path.endswith(".py"):
+        format = Format.PYTHON
     mode = "r"
     if format == Format.PARQUET:
@@ -68,11 +104,29 @@ def load_objects(
             objs = list(yaml.safe_load_all(f))
         else:
             objs = yaml.safe_load(f)
-    elif format == Format.TSV or (not format and file_path.endswith(".tsv")):
-        reader = csv.DictReader(f, delimiter="\t")
-        objs = list(reader)
-    elif format == Format.CSV or (not format and file_path.endswith(".csv")):
-        reader = csv.DictReader(f)
+    elif format == Format.TSV or format == Format.CSV:
+        # Skip initial comment lines if comment_char is set
+        if header_comment_token:
+            # Store the original position
+            original_pos = f.tell()
+            # Read and store lines until we find a non-comment line
+            lines = []
+            for line in f:
+                if not line.startswith(header_comment_token):
+                    break
+                lines.append(line)
+            # Go back to the original position
+            f.seek(original_pos)
+            # Skip the comment lines we found
+            for _ in lines:
+                f.readline()
+        if format == Format.TSV:
+            reader = csv.DictReader(f, delimiter="\t")
+        else:
+            reader = csv.DictReader(f)
         objs = list(reader)
     elif format == Format.PARQUET:
         import pyarrow.parquet as pq
@@ -151,6 +205,9 @@ def render_output(
     if isinstance(data, pd.DataFrame):
         data = data.to_dict(orient="records")
+    if isinstance(data, dict) and format in [Format.TSV, Format.CSV]:
+        data = [data]
     if isinstance(data, BaseModel):
         data = data.model_dump()
@@ -158,6 +215,10 @@ def render_output(
         return json.dumps(data, indent=2, default=str)
     elif format == Format.JSONL:
         return "\n".join(json.dumps(obj) for obj in data)
+    elif format == Format.PYTHON:
+        return str(data)
+    elif format == Format.TABLE:
+        return tabulate(pd.DataFrame(data), headers="keys", tablefmt="psql")
     elif format == Format.YAML:
         if isinstance(data, list):
             return yaml.safe_dump_all(data, sort_keys=False)

linkml-store 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

Potentially problematic release.

linkml-store 0.1.10py3-none-any.whl → 0.1.11py3-none-any.whl