PyPI - linkml-store - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl - Mend

linkml-store 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of linkml-store might be problematic. Click here for more details.

Files changed (32) hide show

linkml_store/api/client.py +32 -5
linkml_store/api/collection.py +276 -27
linkml_store/api/config.py +6 -2
linkml_store/api/database.py +264 -21
linkml_store/api/stores/chromadb/__init__.py +5 -1
linkml_store/api/stores/duckdb/__init__.py +9 -0
linkml_store/api/stores/duckdb/duckdb_collection.py +7 -4
linkml_store/api/stores/duckdb/duckdb_database.py +19 -5
linkml_store/api/stores/duckdb/mappings.py +1 -0
linkml_store/api/stores/filesystem/__init__.py +15 -0
linkml_store/api/stores/filesystem/filesystem_collection.py +177 -0
linkml_store/api/stores/filesystem/filesystem_database.py +72 -0
linkml_store/api/stores/hdf5/__init__.py +7 -0
linkml_store/api/stores/mongodb/__init__.py +25 -0
linkml_store/api/stores/mongodb/mongodb_collection.py +31 -10
linkml_store/api/stores/mongodb/mongodb_database.py +13 -2
linkml_store/api/types.py +4 -0
linkml_store/cli.py +150 -15
linkml_store/index/__init__.py +6 -2
linkml_store/index/implementations/llm_indexer.py +83 -5
linkml_store/index/implementations/simple_indexer.py +2 -2
linkml_store/index/indexer.py +32 -8
linkml_store/utils/change_utils.py +17 -0
linkml_store/utils/format_utils.py +139 -8
linkml_store/utils/patch_utils.py +126 -0
linkml_store/utils/query_utils.py +89 -0
{linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/METADATA +7 -1
linkml_store-0.1.9.dist-info/RECORD +49 -0
linkml_store-0.1.7.dist-info/RECORD +0 -42
{linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/LICENSE +0 -0
{linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/WHEEL +0 -0
{linkml_store-0.1.7.dist-info → linkml_store-0.1.9.dist-info}/entry_points.txt +0 -0

linkml_store/api/stores/filesystem/filesystem_collection.py ADDED Viewed

@@ -0,0 +1,177 @@
+import logging
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+from linkml_store.api import Collection
+from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
+from linkml_store.api.queries import Query, QueryResult
+from linkml_store.api.types import DatabaseType
+from linkml_store.utils.query_utils import mongo_query_to_match_function
+logger = logging.getLogger(__name__)
+class FileSystemCollection(Collection[DatabaseType]):
+    path: Optional[Path] = None
+    file_format: Optional[str] = None
+    encoding: Optional[str] = None
+    _objects_list: List[OBJECT] = None
+    _object_map: Dict[str, OBJECT] = None
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        parent: DatabaseType = self.parent
+        if not self.path:
+            if self.parent:
+                self.path = Path(parent.directory_path)
+        self._objects_list = []
+        self._object_map = {}
+        if not self.file_format:
+            self.file_format = "json"
+    @property
+    def path_to_file(self):
+        return Path(self.parent.directory_path) / f"{self.name}.{self.file_format}"
+    @property
+    def objects_as_list(self) -> List[OBJECT]:
+        if self._object_map:
+            return list(self._object_map.values())
+        else:
+            return self._objects_list
+    def _set_objects(self, objs: List[OBJECT]):
+        pk = self.identifier_attribute_name
+        if pk:
+            self._object_map = {obj[pk]: obj for obj in objs}
+            self._objects_list = []
+        else:
+            self._objects_list = objs
+            self._object_map = {}
+    def commit(self):
+        path = self.path_to_file
+        if not path:
+            raise ValueError("Path not set")
+        path.parent.mkdir(parents=True, exist_ok=True)
+        self._save(path)
+    def _save(self, path: Path):
+        encoding = self.encoding or "utf-8"
+        fmt = self.file_format or "json"
+        mode = "w"
+        if fmt == "parquet":
+            mode = "wb"
+            encoding = None
+        with open(path, mode, encoding=encoding) as stream:
+            if fmt == "json":
+                import json
+                json.dump(self.objects_as_list, stream, indent=2)
+            elif fmt == "jsonl":
+                import jsonlines
+                writer = jsonlines.Writer(stream)
+                writer.write_all(self.objects_as_list)
+            elif fmt == "yaml":
+                import yaml
+                yaml.dump_all(self.objects_as_list, stream)
+            elif fmt == "parquet":
+                import pandas as pd
+                import pyarrow
+                import pyarrow.parquet as pq
+                df = pd.DataFrame(self.objects_as_list)
+                table = pyarrow.Table.from_pandas(df)
+                pq.write_table(table, stream)
+            elif fmt in {"csv", "tsv"}:
+                import csv
+                delimiter = "\t" if fmt == "tsv" else ","
+                fieldnames = list(self.objects_as_list[0].keys())
+                for obj in self.objects_as_list[1:]:
+                    fieldnames.extend([k for k in obj.keys() if k not in fieldnames])
+                writer = csv.DictWriter(stream, fieldnames=fieldnames, delimiter=delimiter)
+                writer.writeheader()
+                for obj in self.objects_as_list:
+                    writer.writerow(obj)
+            else:
+                raise ValueError(f"Unsupported file format: {fmt}")
+    def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
+        if not isinstance(objs, list):
+            objs = [objs]
+        if not objs:
+            return
+        pk = self.identifier_attribute_name
+        if pk:
+            for obj in objs:
+                if pk not in obj:
+                    raise ValueError(f"Primary key {pk} not found in object {obj}")
+                pk_val = obj[pk]
+                self._object_map[pk_val] = obj
+        else:
+            self._objects_list.extend(objs)
+    def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
+        if not isinstance(objs, list):
+            objs = [objs]
+        if not objs:
+            return 0
+        pk = self.identifier_attribute_name
+        n = 0
+        if pk:
+            for obj in objs:
+                pk_val = obj[pk]
+                if pk_val in self._object_map:
+                    del self._object_map[pk_val]
+                    n += 1
+        else:
+            n = len(objs)
+            self._objects_list = [o for o in self._objects_list if o not in objs]
+            n = n - len(objs)
+        return n
+    def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
+        logger.info(f"Deleting from {self.target_class_name} where: {where}")
+        if where is None:
+            where = {}
+        def matches(obj: OBJECT):
+            for k, v in where.items():
+                if obj.get(k) != v:
+                    return False
+            return True
+        print(type(self))
+        print(self)
+        print(vars(self))
+        curr_objects = [o for o in self.objects_as_list if not matches(o)]
+        self._set_objects(curr_objects)
+    def query(self, query: Query, **kwargs) -> QueryResult:
+        where = query.where_clause or {}
+        match = mongo_query_to_match_function(where)
+        rows = [o for o in self.objects_as_list if match(o)]
+        count = len(rows)
+        return QueryResult(query=query, num_rows=count, rows=rows)
+    def query_facets(
+        self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
+    ) -> Dict[str, Dict[str, int]]:
+        match = mongo_query_to_match_function(where)
+        rows = [o for o in self.objects_as_list if match(o)]
+        if not facet_columns:
+            facet_columns = self.class_definition().attributes.keys()
+        facet_results = {c: {} for c in facet_columns}
+        for row in rows:
+            for fc in facet_columns:
+                if fc in row:
+                    v = row[fc]
+                    if v not in facet_results[fc]:
+                        facet_results[fc][v] = 1
+                    else:
+                        facet_results[fc][v] += 1
+        return {fc: list(facet_results[fc].items()) for fc in facet_results}

linkml_store/api/stores/filesystem/filesystem_database.py ADDED Viewed

@@ -0,0 +1,72 @@
+import logging
+from pathlib import Path
+from typing import Optional
+import yaml
+from linkml.utils.schema_builder import SchemaBuilder
+from linkml_runtime import SchemaView
+from linkml_store.api import Database
+from linkml_store.api.config import DatabaseConfig
+from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
+from linkml_store.utils.format_utils import Format, load_objects
+logger = logging.getLogger(__name__)
+class FileSystemDatabase(Database):
+    collection_class = FileSystemCollection
+    directory_path: Optional[Path] = None
+    default_file_format: Optional[str] = None
+    def __init__(self, handle: Optional[str] = None, **kwargs):
+        handle = handle.replace("file:", "")
+        if handle.startswith("//"):
+            handle = handle[2:]
+        self.directory_path = Path(handle)
+        self.load_metadata()
+        super().__init__(handle=handle, **kwargs)
+    @property
+    def metadata_path(self) -> Path:
+        return self.directory_path / ".linkml_metadata.yaml"
+    def load_metadata(self):
+        if self.metadata_path.exists():
+            md_dict = yaml.safe_load(open(self.metadata_path))
+            metadata = DatabaseConfig(**md_dict)
+        else:
+            metadata = DatabaseConfig()
+        self.metadata = metadata
+    def close(self, **kwargs):
+        pass
+    def init_collections(self):
+        metadata = self.metadata
+        if self._collections is None:
+            self._collections = {}
+        for name, collection_config in metadata.collections.items():
+            collection = FileSystemCollection(parent=self, **collection_config.dict())
+            self._collections[name] = collection
+        path = self.directory_path
+        if path.exists():
+            for fmt in Format:
+                suffix = fmt.value
+                logger.info(f"Looking for {suffix} files in {path}")
+                for f in path.glob(f"*.{suffix}"):
+                    logger.info(f"Found {f}")
+                    n = f.stem
+                    objs = load_objects(f, suffix, expected_type=list)
+                    collection = FileSystemCollection(parent=self, name=n)
+                    self._collections[n] = collection
+                    collection._set_objects(objs)
+    def induce_schema_view(self) -> SchemaView:
+        logger.info(f"Inducing schema view for {self.handle}")
+        sb = SchemaBuilder()
+        for collection_name in self.list_collection_names():
+            sb.add_class(collection_name)
+        return SchemaView(sb.schema)

linkml_store/api/stores/hdf5/__init__.py CHANGED Viewed

@@ -0,0 +1,7 @@
+"""
+Adapter for HDF5 file storage.
+.. warning::
+    Experimental support for HDF5 storage.
+"""

linkml_store/api/stores/mongodb/__init__.py CHANGED Viewed

@@ -0,0 +1,25 @@
+"""
+Adapter for MongoDB document store.
+Handles have the form: ``mongodb://<host>:<port>/<database>``
+To use this, you must have the `pymongo` extra installed.
+.. code-block:: bash
+    pip install linkml-store[mongodb]
+or
+.. code-block:: bash
+    pip install linkml-store[all]
+"""
+from linkml_store.api.stores.mongodb.mongodb_collection import MongoDBCollection
+from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
+__all__ = [
+    "MongoDBCollection",
+    "MongoDBDatabase",
+]

linkml_store/api/stores/mongodb/mongodb_collection.py CHANGED Viewed

@@ -13,22 +13,36 @@ logger = logging.getLogger(__name__)
 class MongoDBCollection(Collection):
+    """
+    Adapter for collections in a MongoDB database.
+    .. note::
+        You should not use or manipulate this class directly.
+        Instead, use the general :class:`linkml_store.api.Collection`
+    """
     @property
     def mongo_collection(self) -> MongoCollection:
         if not self.name:
             raise ValueError("Collection name not set")
-        return self.parent.native_db[self.name]
+        collection_name = self.alias or self.name
+        return self.parent.native_db[collection_name]
     def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
         if not isinstance(objs, list):
             objs = [objs]
         self.mongo_collection.insert_many(objs)
+        # TODO: allow mapping of _id to id for efficiency
+        for obj in objs:
+            del obj["_id"]
+        self._post_insert_hook(objs)
-    def query(self, query: Query, **kwargs) -> QueryResult:
+    def query(self, query: Query, limit: Optional[int] = None, **kwargs) -> QueryResult:
         mongo_filter = self._build_mongo_filter(query.where_clause)
-        if query.limit:
-            cursor = self.mongo_collection.find(mongo_filter).limit(query.limit)
+        limit = limit or query.limit
+        if limit and limit >= 0:
+            cursor = self.mongo_collection.find(mongo_filter).limit(limit)
         else:
             cursor = self.mongo_collection.find(mongo_filter)
@@ -62,24 +76,31 @@ class MongoDBCollection(Collection):
             if isinstance(col, tuple):
                 sd = SlotDefinition(name="PLACEHOLDER")
             else:
-                sd = cd.attributes[col]
-            if sd.multivalued:
+                if col in cd.attributes:
+                    sd = cd.attributes[col]
+                else:
+                    logger.info(f"No schema metadata for {col}")
+                    sd = SlotDefinition(name=col)
+            group = {"$group": {"_id": f"${col}", "count": {"$sum": 1}}}
+            if isinstance(col, tuple):
+                q = {k.replace(".", ""): f"${k}" for k in col}
+                group["$group"]["_id"] = q
+            if sd and sd.multivalued:
                 facet_pipeline = [
                     {"$match": where} if where else {"$match": {}},
                     {"$unwind": f"${col}"},
-                    {"$group": {"_id": f"${col}", "count": {"$sum": 1}}},
+                    group,
                     {"$sort": {"count": -1}},
                     {"$limit": facet_limit},
                 ]
             else:
                 facet_pipeline = [
                     {"$match": where} if where else {"$match": {}},
-                    {"$group": {"_id": f"${col}", "count": {"$sum": 1}}},
+                    group,
                     {"$sort": {"count": -1}},
                     {"$limit": facet_limit},
                 ]
+            logger.info(f"Facet pipeline: {facet_pipeline}")
             facet_results = list(self.mongo_collection.aggregate(facet_pipeline))
             results[col] = [(result["_id"], result["count"]) for result in facet_results]

linkml_store/api/stores/mongodb/mongodb_database.py CHANGED Viewed

@@ -29,9 +29,17 @@ class MongoDBDatabase(Database):
     def __init__(self, handle: Optional[str] = None, **kwargs):
         if handle is None:
-            handle = "mongodb://localhost:27017"
+            handle = "mongodb://localhost:27017/test"
         super().__init__(handle=handle, **kwargs)
+    @property
+    def _db_name(self) -> str:
+        if self.handle:
+            db = self.handle.split("/")[-1]
+        else:
+            db = "default"
+        return db
     @property
     def native_client(self) -> MongoClient:
         if self._native_client is None:
@@ -44,7 +52,7 @@ class MongoDBDatabase(Database):
             alias = self.metadata.alias
             if not alias:
                 alias = "default"
-            self._native_db = self.native_client[alias]
+            self._native_db = self.native_client[self._db_name]
         return self._native_db
     def commit(self, **kwargs):
@@ -58,9 +66,12 @@ class MongoDBDatabase(Database):
         self.native_client.drop_database(self.metadata.alias)
     def query(self, query: Query, **kwargs) -> QueryResult:
+        # TODO: DRY
         if query.from_table:
             collection = self.get_collection(query.from_table)
             return collection.query(query, **kwargs)
+        else:
+            raise NotImplementedError(f"Querying without a table is not supported in {self.__class__.__name__}")
     def init_collections(self):
         if self._collections is None:

linkml_store/api/types.py ADDED Viewed

@@ -0,0 +1,4 @@
+from typing import TypeVar
+DatabaseType = TypeVar("DatabaseType", bound="Database")  # noqa: F821
+CollectionType = TypeVar("CollectionType", bound="Collection")  # noqa: F821

linkml_store/cli.py CHANGED Viewed

@@ -11,12 +11,19 @@ from pydantic import BaseModel
 from linkml_store import Client
 from linkml_store.api import Collection, Database
 from linkml_store.api.queries import Query
+from linkml_store.index import get_indexer
 from linkml_store.index.implementations.simple_indexer import SimpleIndexer
 from linkml_store.index.indexer import Indexer
-from linkml_store.utils.format_utils import Format, load_objects, render_output
+from linkml_store.utils.format_utils import Format, guess_format, load_objects, render_output, write_output
 from linkml_store.utils.object_utils import object_path_update
-index_type_option = click.option("--index-type", "-t")
+index_type_option = click.option(
+    "--index-type",
+    "-t",
+    default="simple",
+    show_default=True,
+    help="Type of index to create. Values: simple, llm",
+)
 logger = logging.getLogger(__name__)
@@ -70,6 +77,9 @@ class ContextSettings(BaseModel):
 format_choice = click.Choice([f.value for f in Format])
+include_internal_option = click.option("--include-internal/--no-include-internal", default=False, show_default=True)
 @click.group()
 @click.option("--database", "-d", help="Database name")
 @click.option("--collection", "-c", help="Collection name")
@@ -89,6 +99,15 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
     if not stacktrace:
         sys.tracebacklimit = 0
     logger = logging.getLogger()
+    # Set handler for the root logger to output to the console
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
+    # Clear existing handlers to avoid duplicate messages if function runs multiple times
+    logger.handlers = []
+    # Add the newly created console handler to the logger
+    logger.addHandler(console_handler)
     if verbose >= 2:
         logger.setLevel(logging.DEBUG)
     elif verbose == 1:
@@ -162,6 +181,7 @@ def insert(ctx, files, object, format):
             objects = yaml.safe_load(object_str)
             collection.insert(objects)
             click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{collection.name}'.")
+    collection.commit()
 @cli.command()
@@ -193,14 +213,107 @@ def store(ctx, files, object, format):
             click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{db.name}'.")
+@cli.command(name="import")
+@click.option("--format", "-f", help="Input format")
+@click.pass_context
+@click.argument("files", type=click.Path(exists=True), nargs=-1)
+def import_database(ctx, files, format):
+    """Imports a database from a dump."""
+    settings = ctx.obj["settings"]
+    db = settings.database
+    if not files and not object:
+        files = ["-"]
+    for file_path in files:
+        db.import_database(file_path, source_format=format)
 @cli.command()
-@click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
+@click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
+@click.option("--output", "-o", required=True, type=click.Path(), help="Output file path")
+@click.pass_context
+def export(ctx, output_type, output):
+    """Exports a database to a dump."""
+    settings = ctx.obj["settings"]
+    db = settings.database
+    if output_type is None:
+        output_type = guess_format(output)
+    if output_type is None:
+        raise ValueError(f"Output format must be specified can't be inferred from {output}.")
+    db.export_database(output, target_format=output_type)
+@cli.command()
+@click.option("--output", "-o", type=click.Path(), help="Output file path")
+@click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
+@click.option("--other-database", "-D", required=False, help="Path to the other database")
+@click.option("--other-collection", "-X", required=True, help="Name of the other collection")
+@click.option("--identifier-attribute", "-I", required=False, help="Primary key name")
+@click.pass_context
+def diff(ctx, output, output_type, other_database, other_collection, identifier_attribute):
+    """Diffs two collectoons to create a patch."""
+    settings = ctx.obj["settings"]
+    db = settings.database
+    collection = settings.collection
+    if not collection:
+        raise ValueError("Collection must be specified.")
+    other_db = settings.client.get_database(other_database) if other_database else db
+    other_collection = other_db.get_collection(other_collection)
+    if identifier_attribute:
+        collection.set_identifier_attribute_name(identifier_attribute)
+        other_collection.set_identifier_attribute_name(identifier_attribute)
+    diff = collection.diff(other_collection)
+    write_output(diff, output_type, target=output)
+@cli.command()
+@click.option("--identifier-attribute", "-I", required=False, help="Primary key name")
+@click.argument("patch_files", type=click.Path(exists=True), nargs=-1)
+@click.pass_context
+def apply(ctx, patch_files, identifier_attribute):
+    """
+    Apply a patch to a collection.
+    """
+    settings = ctx.obj["settings"]
+    collection = settings.collection
+    if not collection:
+        raise ValueError("Collection must be specified.")
+    if identifier_attribute:
+        collection.set_identifier_attribute_name(identifier_attribute)
+    for patch_file in patch_files:
+        patch_objs = load_objects(patch_file, expected_type=list)
+        collection.apply_patches(patch_objs)
+@cli.command()
+@click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query, as YAML")
 @click.option("--limit", "-l", type=click.INT, help="Maximum number of results to return")
 @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
 @click.option("--output", "-o", type=click.Path(), help="Output file path")
 @click.pass_context
 def query(ctx, where, limit, output_type, output):
-    """Query objects from the specified collection."""
+    """Query objects from the specified collection.
+    Leave the query field blank to return all objects in the collection.
+    Examples:
+        linkml-store -d duckdb:///countries.db -c countries query
+    Queries can be specified in YAML, as basic key-value pairs
+    Examples:
+        linkml-store -d duckdb:///countries.db -c countries query -w 'code: NZ'
+    More complex queries can be specified using MongoDB-style query syntax
+    Examples:
+        linkml-store -d file:. -c persons query  -w 'occupation: {$ne: Architect}'
+    Finds all people who are not architects.
+    """
     collection = ctx.obj["settings"].collection
     where_clause = yaml.safe_load(where) if where else None
     query = Query(from_table=collection.name, where_clause=where_clause, limit=limit)
@@ -216,9 +329,10 @@ def query(ctx, where, limit, output_type, output):
 @cli.command()
 @click.pass_context
-def list_collections(ctx):
+@include_internal_option
+def list_collections(ctx, **kwargs):
     db = ctx.obj["settings"].database
-    for collection in db.list_collections():
+    for collection in db.list_collections(**kwargs):
         click.echo(collection.name)
         click.echo(render_output(collection.metadata))
@@ -254,7 +368,7 @@ def fq(ctx, where, limit, columns, output_type, output):
     def _untuple(key):
         if isinstance(key, tuple):
-            return "+".join(key)
+            return "+".join([str(x) for x in key])
         return key
     count_dict = {}
@@ -277,19 +391,34 @@ def _get_index(index_type=None, **kwargs) -> Indexer:
         raise ValueError(f"Unknown index type: {index_type}")
+@cli.command()
+@click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
+@click.option("--output-type", "-O", type=format_choice, default=Format.FORMATTED.value, help="Output format")
+@click.option("--output", "-o", type=click.Path(), help="Output file path")
+@click.pass_context
+def describe(ctx, where, output_type, output):
+    """
+    Describe the collection schema.
+    """
+    where_clause = yaml.safe_load(where) if where else None
+    collection = ctx.obj["settings"].collection
+    df = collection.find(where_clause, limit=1).rows_dataframe
+    write_output(df.describe(include="all").transpose(), output_type, target=output)
 @cli.command()
 @index_type_option
+@click.option("--cached-embeddings-database", "-E", help="Path to the database where embeddings are cached")
+@click.option("--text-template", "-T", help="Template for text embeddings")
 @click.pass_context
-def index(ctx, index_type):
+def index(ctx, index_type, **kwargs):
     """
     Create an index over a collection.
-    :param ctx:
-    :param index_type:
-    :return:
+    By default a simple trigram index is used.
     """
     collection = ctx.obj["settings"].collection
-    ix = _get_index(index_type)
+    ix = get_indexer(index_type, **kwargs)
     collection.attach_indexer(ix)
@@ -322,14 +451,17 @@ def schema(ctx, output_type, output):
 @click.option("--limit", "-l", type=click.INT, help="Maximum number of search results")
 @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
 @click.option("--output", "-o", type=click.Path(), help="Output file path")
+@click.option(
+    "--auto-index/--no-auto-index", default=False, show_default=True, help="Automatically index the collection"
+)
 @index_type_option
 @click.pass_context
-def search(ctx, search_term, where, limit, index_type, output_type, output):
+def search(ctx, search_term, where, limit, index_type, output_type, output, auto_index):
     """Search objects in the specified collection."""
     collection = ctx.obj["settings"].collection
-    ix = _get_index(index_type)
+    ix = get_indexer(index_type)
     logger.info(f"Attaching index to collection {collection.name}: {ix.model_dump()}")
-    collection.attach_indexer(ix, auto_index=False)
+    collection.attach_indexer(ix, auto_index=auto_index)
     result = collection.search(search_term, where=where, limit=limit)
     output_data = render_output([{"score": row[0], **row[1]} for row in result.ranked_rows], output_type)
     if output:
@@ -343,6 +475,9 @@ def search(ctx, search_term, where, limit, index_type, output_type, output):
 @cli.command()
 @click.pass_context
 def indexes(ctx):
+    """
+    Show the indexes for a collection.
+    """
     collection = ctx.obj["settings"].collection
     for name, ix in collection.indexers.items():
         click.echo(f"{name}: {type(ix)}\n{ix.model_json()}")

linkml_store/index/__init__.py CHANGED Viewed

@@ -22,7 +22,7 @@ def get_indexer_class(name: str) -> Type[Indexer]:
     return INDEXER_CLASSES[name]
-def get_indexer(name: str, *args, **kwargs) -> Indexer:
+def get_indexer(name: str, **kwargs) -> Indexer:
     """
     Get an indexer by name.
@@ -30,4 +30,8 @@ def get_indexer(name: str, *args, **kwargs) -> Indexer:
     :param kwargs: additional arguments to pass to the indexer
     :return: the indexer
     """
-    return get_indexer_class(name)(*args, **kwargs)
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+    cls = get_indexer_class(name)
+    kwargs["name"] = name
+    indexer = cls(**kwargs)
+    return indexer

linkml-store 0.1.7__py3-none-any.whl → 0.1.9__py3-none-any.whl

Potentially problematic release.

linkml-store 0.1.7py3-none-any.whl → 0.1.9py3-none-any.whl