PyPI - linkml-store - Versions diffs - 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl - Mend

linkml-store 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of linkml-store might be problematic. Click here for more details.

Files changed (27) hide show

linkml_store/api/client.py +30 -5
linkml_store/api/collection.py +175 -21
linkml_store/api/config.py +6 -2
linkml_store/api/database.py +230 -18
linkml_store/api/stores/chromadb/__init__.py +5 -1
linkml_store/api/stores/duckdb/__init__.py +9 -0
linkml_store/api/stores/duckdb/duckdb_collection.py +6 -4
linkml_store/api/stores/duckdb/duckdb_database.py +19 -5
linkml_store/api/stores/duckdb/mappings.py +1 -0
linkml_store/api/stores/filesystem/__init__.py +16 -0
linkml_store/api/stores/filesystem/filesystem_collection.py +142 -0
linkml_store/api/stores/filesystem/filesystem_database.py +36 -0
linkml_store/api/stores/hdf5/__init__.py +7 -0
linkml_store/api/stores/mongodb/__init__.py +25 -0
linkml_store/api/stores/mongodb/mongodb_collection.py +21 -6
linkml_store/cli.py +64 -10
linkml_store/index/__init__.py +6 -2
linkml_store/index/implementations/llm_indexer.py +83 -5
linkml_store/index/implementations/simple_indexer.py +2 -2
linkml_store/index/indexer.py +32 -8
linkml_store/utils/format_utils.py +52 -2
{linkml_store-0.1.7.dist-info → linkml_store-0.1.8.dist-info}/METADATA +4 -1
linkml_store-0.1.8.dist-info/RECORD +45 -0
linkml_store-0.1.7.dist-info/RECORD +0 -42
{linkml_store-0.1.7.dist-info → linkml_store-0.1.8.dist-info}/LICENSE +0 -0
{linkml_store-0.1.7.dist-info → linkml_store-0.1.8.dist-info}/WHEEL +0 -0
{linkml_store-0.1.7.dist-info → linkml_store-0.1.8.dist-info}/entry_points.txt +0 -0

linkml_store/api/stores/filesystem/filesystem_collection.py ADDED Viewed

@@ -0,0 +1,142 @@
+import logging
+from typing import Any, Dict, List, Optional, Union
+import sqlalchemy as sqla
+from linkml_runtime.linkml_model import ClassDefinition, SlotDefinition
+from sqlalchemy import Column, Table, delete, insert, inspect, text
+from sqlalchemy.sql.ddl import CreateTable
+from linkml_store.api import Collection
+from linkml_store.api.collection import DEFAULT_FACET_LIMIT, OBJECT
+from linkml_store.api.queries import Query
+from linkml_store.api.stores.duckdb.mappings import TMAP
+from linkml_store.utils.sql_utils import facet_count_sql
+logger = logging.getLogger(__name__)
+class FileSystemCollection(Collection):
+    _table_created: bool = None
+    def insert(self, objs: Union[OBJECT, List[OBJECT]], **kwargs):
+        if not isinstance(objs, list):
+            objs = [objs]
+        if not objs:
+            return
+        cd = self.class_definition()
+        if not cd:
+            cd = self.induce_class_definition_from_objects(objs)
+        self._create_table(cd)
+        table = self._sqla_table(cd)
+        logger.info(f"Inserting into: {self.alias} // T={table.name}")
+        engine = self.parent.engine
+        col_names = [c.name for c in table.columns]
+        objs = [{k: obj.get(k, None) for k in col_names} for obj in objs]
+        with engine.connect() as conn:
+            with conn.begin():
+                conn.execute(insert(table), objs)
+            conn.commit()
+    def delete(self, objs: Union[OBJECT, List[OBJECT]], **kwargs) -> Optional[int]:
+        if not isinstance(objs, list):
+            objs = [objs]
+        cd = self.class_definition()
+        if not cd:
+            cd = self.induce_class_definition_from_objects(objs)
+        table = self._sqla_table(cd)
+        engine = self.parent.engine
+        with engine.connect() as conn:
+            for obj in objs:
+                conditions = [table.c[k] == v for k, v in obj.items() if k in cd.attributes]
+                stmt = delete(table).where(*conditions)
+                stmt = stmt.compile(engine)
+                conn.execute(stmt)
+                conn.commit()
+        return
+    def delete_where(self, where: Optional[Dict[str, Any]] = None, missing_ok=True, **kwargs) -> Optional[int]:
+        logger.info(f"Deleting from {self.target_class_name} where: {where}")
+        if where is None:
+            where = {}
+        cd = self.class_definition()
+        if not cd:
+            logger.info(f"No class definition found for {self.target_class_name}, assuming not prepopulated")
+            return 0
+        table = self._sqla_table(cd)
+        engine = self.parent.engine
+        inspector = inspect(engine)
+        table_exists = table.name in inspector.get_table_names()
+        if not table_exists:
+            logger.info(f"Table {table.name} does not exist, assuming no data")
+            return 0
+        with engine.connect() as conn:
+            conditions = [table.c[k] == v for k, v in where.items()]
+            stmt = delete(table).where(*conditions)
+            stmt = stmt.compile(engine)
+            result = conn.execute(stmt)
+            deleted_rows_count = result.rowcount
+            if deleted_rows_count == 0 and not missing_ok:
+                raise ValueError(f"No rows found for {where}")
+            conn.commit()
+            return deleted_rows_count if deleted_rows_count > -1 else None
+    def query_facets(
+        self, where: Dict = None, facet_columns: List[str] = None, facet_limit=DEFAULT_FACET_LIMIT, **kwargs
+    ) -> Dict[str, Dict[str, int]]:
+        results = {}
+        cd = self.class_definition()
+        with self.parent.engine.connect() as conn:
+            if not facet_columns:
+                facet_columns = list(self.class_definition().attributes.keys())
+            for col in facet_columns:
+                logger.debug(f"Faceting on {col}")
+                if isinstance(col, tuple):
+                    sd = SlotDefinition(name="PLACEHOLDER")
+                else:
+                    sd = cd.attributes[col]
+                facet_query = self._create_query(where_clause=where)
+                facet_query_str = facet_count_sql(facet_query, col, multivalued=sd.multivalued)
+                logger.debug(f"Facet query: {facet_query_str}")
+                rows = list(conn.execute(text(facet_query_str)))
+                results[col] = rows
+            return results
+    def _sqla_table(self, cd: ClassDefinition) -> Table:
+        schema_view = self.parent.schema_view
+        metadata_obj = sqla.MetaData()
+        cols = []
+        for att in schema_view.class_induced_slots(cd.name):
+            typ = TMAP.get(att.range, sqla.String)
+            if att.inlined:
+                typ = sqla.JSON
+            if att.multivalued:
+                typ = sqla.ARRAY(typ, dimensions=1)
+            if att.array:
+                typ = sqla.ARRAY(typ, dimensions=1)
+            col = Column(att.name, typ)
+            cols.append(col)
+        t = Table(self.alias, metadata_obj, *cols)
+        return t
+    def _create_table(self, cd: ClassDefinition):
+        if self._table_created or self.metadata.is_prepopulated:
+            logger.info(f"Already have table for: {cd.name}")
+            return
+        query = Query(
+            from_table="information_schema.tables", where_clause={"table_type": "BASE TABLE", "table_name": self.alias}
+        )
+        qr = self.parent.query(query)
+        if qr.num_rows > 0:
+            logger.info(f"Table already exists for {cd.name}")
+            self._table_created = True
+            self.metadata.is_prepopulated = True
+            return
+        logger.info(f"Creating table for {cd.name}")
+        t = self._sqla_table(cd)
+        ct = CreateTable(t)
+        ddl = str(ct.compile(self.parent.engine))
+        with self.parent.engine.connect() as conn:
+            conn.execute(text(ddl))
+            conn.commit()
+        self._table_created = True
+        self.metadata.is_prepopulated = True

linkml_store/api/stores/filesystem/filesystem_database.py ADDED Viewed

@@ -0,0 +1,36 @@
+import logging
+from typing import Optional
+from linkml_store.api import Collection, Database
+from linkml_store.api.config import CollectionConfig
+from linkml_store.api.stores.duckdb import DuckDBDatabase
+from linkml_store.api.stores.filesystem.filesystem_collection import FileSystemCollection
+logger = logging.getLogger(__name__)
+class FileSystemDatabase(Database):
+    collection_class = FileSystemCollection
+    wrapped_database: Database = None
+    def __init__(self, handle: Optional[str] = None, recreate_if_exists: bool = False, **kwargs):
+        self.wrapped_database = DuckDBDatabase("duckdb:///:memory:")
+        super().__init__(handle=handle, **kwargs)
+    def commit(self, **kwargs):
+        # TODO: sync
+        pass
+    def close(self, **kwargs):
+        self.wrapped_database.close()
+    def create_collection(
+        self,
+        name: str,
+        alias: Optional[str] = None,
+        metadata: Optional[CollectionConfig] = None,
+        recreate_if_exists=False,
+        **kwargs,
+    ) -> Collection:
+        wd = self.wrapped_database
+        wd.create_collection()

linkml_store/api/stores/hdf5/__init__.py CHANGED Viewed

@@ -0,0 +1,7 @@
+"""
+Adapter for HDF5 file storage.
+.. warning::
+    Experimental support for HDF5 storage.
+"""

linkml_store/api/stores/mongodb/__init__.py CHANGED Viewed

@@ -0,0 +1,25 @@
+"""
+Adapter for MongoDB document store.
+Handles have the form: ``mongodb://<host>:<port>/<database>``
+To use this, you must have the `pymongo` extra installed.
+.. code-block:: bash
+    pip install linkml-store[mongodb]
+or
+.. code-block:: bash
+    pip install linkml-store[all]
+"""
+from linkml_store.api.stores.mongodb.mongodb_collection import MongoDBCollection
+from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
+__all__ = [
+    "MongoDBCollection",
+    "MongoDBDatabase",
+]

linkml_store/api/stores/mongodb/mongodb_collection.py CHANGED Viewed

@@ -13,6 +13,14 @@ logger = logging.getLogger(__name__)
 class MongoDBCollection(Collection):
+    """
+    Adapter for collections in a MongoDB database.
+    .. note::
+        You should not use or manipulate this class directly.
+        Instead, use the general :class:`linkml_store.api.Collection`
+    """
     @property
     def mongo_collection(self) -> MongoCollection:
@@ -62,24 +70,31 @@ class MongoDBCollection(Collection):
             if isinstance(col, tuple):
                 sd = SlotDefinition(name="PLACEHOLDER")
             else:
-                sd = cd.attributes[col]
-            if sd.multivalued:
+                if col in cd.attributes:
+                    sd = cd.attributes[col]
+                else:
+                    logger.info(f"No schema metadata for {col}")
+                    sd = SlotDefinition(name=col)
+            group = {"$group": {"_id": f"${col}", "count": {"$sum": 1}}}
+            if isinstance(col, tuple):
+                q = {k.replace(".", ""): f"${k}" for k in col}
+                group["$group"]["_id"] = q
+            if sd and sd.multivalued:
                 facet_pipeline = [
                     {"$match": where} if where else {"$match": {}},
                     {"$unwind": f"${col}"},
-                    {"$group": {"_id": f"${col}", "count": {"$sum": 1}}},
+                    group,
                     {"$sort": {"count": -1}},
                     {"$limit": facet_limit},
                 ]
             else:
                 facet_pipeline = [
                     {"$match": where} if where else {"$match": {}},
-                    {"$group": {"_id": f"${col}", "count": {"$sum": 1}}},
+                    group,
                     {"$sort": {"count": -1}},
                     {"$limit": facet_limit},
                 ]
+            logger.info(f"Facet pipeline: {facet_pipeline}")
             facet_results = list(self.mongo_collection.aggregate(facet_pipeline))
             results[col] = [(result["_id"], result["count"]) for result in facet_results]

linkml_store/cli.py CHANGED Viewed

@@ -11,12 +11,19 @@ from pydantic import BaseModel
 from linkml_store import Client
 from linkml_store.api import Collection, Database
 from linkml_store.api.queries import Query
+from linkml_store.index import get_indexer
 from linkml_store.index.implementations.simple_indexer import SimpleIndexer
 from linkml_store.index.indexer import Indexer
-from linkml_store.utils.format_utils import Format, load_objects, render_output
+from linkml_store.utils.format_utils import Format, guess_format, load_objects, render_output
 from linkml_store.utils.object_utils import object_path_update
-index_type_option = click.option("--index-type", "-t")
+index_type_option = click.option(
+    "--index-type",
+    "-t",
+    default="simple",
+    show_default=True,
+    help="Type of index to create. Values: simple, llm",
+)
 logger = logging.getLogger(__name__)
@@ -70,6 +77,9 @@ class ContextSettings(BaseModel):
 format_choice = click.Choice([f.value for f in Format])
+include_internal_option = click.option("--include-internal/--no-include-internal", default=False, show_default=True)
 @click.group()
 @click.option("--database", "-d", help="Database name")
 @click.option("--collection", "-c", help="Collection name")
@@ -89,6 +99,15 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
     if not stacktrace:
         sys.tracebacklimit = 0
     logger = logging.getLogger()
+    # Set handler for the root logger to output to the console
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s"))
+    # Clear existing handlers to avoid duplicate messages if function runs multiple times
+    logger.handlers = []
+    # Add the newly created console handler to the logger
+    logger.addHandler(console_handler)
     if verbose >= 2:
         logger.setLevel(logging.DEBUG)
     elif verbose == 1:
@@ -193,6 +212,35 @@ def store(ctx, files, object, format):
             click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{db.name}'.")
+@cli.command(name="import")
+@click.argument("files", type=click.Path(exists=True), nargs=-1)
+@click.option("--format", "-f", help="Input format")
+@click.pass_context
+def import_database(ctx, files, format):
+    """Imports a database from a dump."""
+    settings = ctx.obj["settings"]
+    db = settings.database
+    if not files and not object:
+        files = ["-"]
+    for file_path in files:
+        db.import_database(file_path, source_format=format)
+@cli.command()
+@click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
+@click.option("--output", "-o", required=True, type=click.Path(), help="Output file path")
+@click.pass_context
+def export(ctx, output_type, output):
+    """Exports a database to a dump."""
+    settings = ctx.obj["settings"]
+    db = settings.database
+    if output_type is None:
+        output_type = guess_format(output)
+    if output_type is None:
+        raise ValueError(f"Output format must be specified can't be inferred from {output}.")
+    db.export_database(output, target_format=output_type)
 @cli.command()
 @click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
 @click.option("--limit", "-l", type=click.INT, help="Maximum number of results to return")
@@ -216,9 +264,10 @@ def query(ctx, where, limit, output_type, output):
 @cli.command()
 @click.pass_context
-def list_collections(ctx):
+@include_internal_option
+def list_collections(ctx, **kwargs):
     db = ctx.obj["settings"].database
-    for collection in db.list_collections():
+    for collection in db.list_collections(**kwargs):
         click.echo(collection.name)
         click.echo(render_output(collection.metadata))
@@ -254,7 +303,7 @@ def fq(ctx, where, limit, columns, output_type, output):
     def _untuple(key):
         if isinstance(key, tuple):
-            return "+".join(key)
+            return "+".join([str(x) for x in key])
         return key
     count_dict = {}
@@ -279,8 +328,10 @@ def _get_index(index_type=None, **kwargs) -> Indexer:
 @cli.command()
 @index_type_option
+@click.option("--cached-embeddings-database", "-E", help="Path to the database where embeddings are cached")
+@click.option("--text-template", "-T", help="Template for text embeddings")
 @click.pass_context
-def index(ctx, index_type):
+def index(ctx, index_type, **kwargs):
     """
     Create an index over a collection.
@@ -289,7 +340,7 @@ def index(ctx, index_type):
     :return:
     """
     collection = ctx.obj["settings"].collection
-    ix = _get_index(index_type)
+    ix = get_indexer(index_type, **kwargs)
     collection.attach_indexer(ix)
@@ -322,14 +373,17 @@ def schema(ctx, output_type, output):
 @click.option("--limit", "-l", type=click.INT, help="Maximum number of search results")
 @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
 @click.option("--output", "-o", type=click.Path(), help="Output file path")
+@click.option(
+    "--auto-index/--no-auto-index", default=False, show_default=True, help="Automatically index the collection"
+)
 @index_type_option
 @click.pass_context
-def search(ctx, search_term, where, limit, index_type, output_type, output):
+def search(ctx, search_term, where, limit, index_type, output_type, output, auto_index):
     """Search objects in the specified collection."""
     collection = ctx.obj["settings"].collection
-    ix = _get_index(index_type)
+    ix = get_indexer(index_type)
     logger.info(f"Attaching index to collection {collection.name}: {ix.model_dump()}")
-    collection.attach_indexer(ix, auto_index=False)
+    collection.attach_indexer(ix, auto_index=auto_index)
     result = collection.search(search_term, where=where, limit=limit)
     output_data = render_output([{"score": row[0], **row[1]} for row in result.ranked_rows], output_type)
     if output:

linkml_store/index/__init__.py CHANGED Viewed

@@ -22,7 +22,7 @@ def get_indexer_class(name: str) -> Type[Indexer]:
     return INDEXER_CLASSES[name]
-def get_indexer(name: str, *args, **kwargs) -> Indexer:
+def get_indexer(name: str, **kwargs) -> Indexer:
     """
     Get an indexer by name.
@@ -30,4 +30,8 @@ def get_indexer(name: str, *args, **kwargs) -> Indexer:
     :param kwargs: additional arguments to pass to the indexer
     :return: the indexer
     """
-    return get_indexer_class(name)(*args, **kwargs)
+    kwargs = {k: v for k, v in kwargs.items() if v is not None}
+    cls = get_indexer_class(name)
+    kwargs["name"] = name
+    indexer = cls(**kwargs)
+    return indexer

linkml_store/index/implementations/llm_indexer.py CHANGED Viewed

@@ -1,20 +1,34 @@
+import logging
+from pathlib import Path
 from typing import TYPE_CHECKING, List
 import numpy as np
+from linkml_store.api.config import CollectionConfig
 from linkml_store.index.indexer import INDEX_ITEM, Indexer
 if TYPE_CHECKING:
     import llm
+logger = logging.getLogger(__name__)
 class LLMIndexer(Indexer):
     """
-    A implementations index wraps the llm library
+    An indexer that wraps the llm library.
+    This indexer is used to convert text to vectors using the llm library.
+    >>> indexer = LLMIndexer(cached_embeddings_database="tests/input/llm_cache.db")
+    >>> vector = indexer.text_to_vector("hello")
     """
     embedding_model_name: str = "ada-002"
     _embedding_model: "llm.EmbeddingModel" = None
+    cached_embeddings_database: str = None
+    cached_embeddings_collection: str = None
+    cache_queries: bool = False
     @property
     def embedding_model(self):
@@ -24,21 +38,85 @@ class LLMIndexer(Indexer):
             self._embedding_model = llm.get_embedding_model(self.embedding_model_name)
         return self._embedding_model
-    def text_to_vector(self, text: str) -> INDEX_ITEM:
+    def text_to_vector(self, text: str, cache: bool = None, **kwargs) -> INDEX_ITEM:
         """
         Convert a text to an indexable object
+        >>> indexer = LLMIndexer(cached_embeddings_database="tests/input/llm_cache.db")
+        >>> vector = indexer.text_to_vector("hello")
         :param text:
         :return:
         """
-        return self.texts_to_vectors([text])[0]
+        return self.texts_to_vectors([text], cache=cache, **kwargs)[0]
-    def texts_to_vectors(self, texts: List[str]) -> List[INDEX_ITEM]:
+    def texts_to_vectors(self, texts: List[str], cache: bool = None, **kwargs) -> List[INDEX_ITEM]:
         """
         Use LLM to embed
+        >>> indexer = LLMIndexer(cached_embeddings_database="tests/input/llm_cache.db")
+        >>> vectors = indexer.texts_to_vectors(["hello", "goodbye"])
         :param texts:
         :return:
         """
-        embeddings = self.embedding_model.embed_multi(texts)
+        logging.info(f"Converting {len(texts)} texts to vectors")
+        model = self.embedding_model
+        if self.cached_embeddings_database and (cache is None or cache or self.cache_queries):
+            model_id = model.model_id
+            if not model_id:
+                raise ValueError("Model ID is required to cache embeddings")
+            db_path = Path(self.cached_embeddings_database)
+            coll_name = self.cached_embeddings_collection
+            if not coll_name:
+                coll_name = "all_embeddings"
+            from linkml_store import Client
+            embeddings_client = Client()
+            config = CollectionConfig(
+                name=coll_name,
+                type="Embeddings",
+                attributes={
+                    "text": {"range": "string"},
+                    "model_id": {"range": "string"},
+                    "embedding": {"range": "float", "array": {}},
+                },
+            )
+            embeddings_db = embeddings_client.get_database(f"duckdb:///{db_path}")
+            if coll_name in embeddings_db.list_collection_names():
+                # Load existing collection and use its model
+                embeddings_collection = embeddings_db.create_collection(coll_name, metadata=config)
+            else:
+                embeddings_collection = embeddings_db.create_collection(coll_name, metadata=config)
+            texts = list(texts)
+            embeddings = list([None] * len(texts))
+            uncached_texts = []
+            n = 0
+            for i in range(len(texts)):
+                # TODO: optimize this
+                text = texts[i]
+                logger.info(f"Looking for cached embedding for {text}")
+                r = embeddings_collection.find({"text": text, "model_id": model_id})
+                if r.num_rows:
+                    embeddings[i] = r.rows[0]["embedding"]
+                    n += 1
+                    logger.info("Found")
+                else:
+                    uncached_texts.append((text, i))
+                    logger.info("NOT Found")
+            logger.info(f"Found {n} cached embeddings")
+            if uncached_texts:
+                logger.info(f"Embedding {len(uncached_texts)} uncached texts")
+                uncached_texts, uncached_indices = zip(*uncached_texts)
+                uncached_embeddings = list(model.embed_multi(uncached_texts))
+                # TODO: combine into a single insert with multiple rows
+                for i, index in enumerate(uncached_indices):
+                    logger.debug(f"Indexing text at {i}")
+                    embeddings[index] = uncached_embeddings[i]
+                    embeddings_collection.insert(
+                        {"text": uncached_texts[i], "embedding": embeddings[index], "model_id": model_id}
+                    )
+        else:
+            logger.info(f"Embedding {len(texts)} texts")
+            embeddings = model.embed_multi(texts)
         return [np.array(v, dtype=float) for v in embeddings]

linkml_store/index/implementations/simple_indexer.py CHANGED Viewed

@@ -15,7 +15,7 @@ class SimpleIndexer(Indexer):
     This uses a naive method to generate an index from text. It is not suitable for production use.
     """
-    def text_to_vector(self, text: str) -> INDEX_ITEM:
+    def text_to_vector(self, text: str, cache: bool = None, **kwargs) -> INDEX_ITEM:
         """
         This is a naive method purely for testing
@@ -39,5 +39,5 @@ class SimpleIndexer(Indexer):
             # Increment the count at the computed index
             vector[index] += 1.0
-        logger.info(f"Indexed text: {text} as {vector}")
+        logger.debug(f"Indexed text: {text} as {vector}")
         return vector

linkml_store/index/indexer.py CHANGED Viewed

@@ -1,3 +1,5 @@
+import logging
+from enum import Enum
 from typing import Any, Callable, Dict, List, Optional, Tuple
 import numpy as np
@@ -5,6 +7,13 @@ from pydantic import BaseModel
 INDEX_ITEM = np.ndarray
+logger = logging.getLogger(__name__)
+class TemplateSyntaxEnum(str, Enum):
+    jinja2 = "jinja2"
+    fstring = "fstring"
 def cosine_similarity(vector1, vector2):
     dot_product = np.dot(vector1, vector2)
@@ -21,8 +30,9 @@ class Indexer(BaseModel):
     name: Optional[str] = None
     index_function: Optional[Callable] = None
     distance_function: Optional[Callable] = None
-    index_attributes: Optional[str] = None
+    index_attributes: Optional[List[str]] = None
     text_template: Optional[str] = None
+    text_template_syntax: Optional[TemplateSyntaxEnum] = None
     filter_nulls: Optional[bool] = True
     vector_default_length: Optional[int] = 1000
     index_field: Optional[str] = "__index__"
@@ -41,24 +51,25 @@ class Indexer(BaseModel):
         Convert a list of objects to indexable objects
         :param objs:
-        :return:
+        :return: list of vectors
         """
-        return [self.object_to_vector(obj) for obj in objs]
+        return self.texts_to_vectors([self.object_to_text(obj) for obj in objs])
-    def texts_to_vectors(self, texts: List[str]) -> List[INDEX_ITEM]:
+    def texts_to_vectors(self, texts: List[str], cache: bool = None, **kwargs) -> List[INDEX_ITEM]:
         """
         Convert a list of texts to indexable objects
         :param texts:
         :return:
         """
-        return [self.text_to_vector(text) for text in texts]
+        return [self.text_to_vector(text, cache=cache, **kwargs) for text in texts]
-    def text_to_vector(self, text: str) -> INDEX_ITEM:
+    def text_to_vector(self, text: str, cache: bool = None, **kwargs) -> INDEX_ITEM:
         """
         Convert a text to an indexable object
         :param text:
+        :param cache:
         :return:
         """
         raise NotImplementedError
@@ -71,11 +82,24 @@ class Indexer(BaseModel):
         :return:
         """
         if self.index_attributes:
+            if len(self.index_attributes) == 1 and not self.text_template:
+                return str(obj[self.index_attributes[0]])
             obj = {k: v for k, v in obj.items() if k in self.index_attributes}
         if self.filter_nulls:
             obj = {k: v for k, v in obj.items() if v is not None}
         if self.text_template:
-            return self.text_template.format(**obj)
+            syntax = self.text_template_syntax
+            if not syntax:
+                if "{%" in self.text_template or "{{" in self.text_template:
+                    logger.info("Detected Jinja2 syntax in text template")
+                    syntax = TemplateSyntaxEnum.jinja2
+            if syntax and syntax == TemplateSyntaxEnum.jinja2:
+                from jinja2 import Template
+                template = Template(self.text_template)
+                return template.render(**obj)
+            else:
+                return self.text_template.format(**obj)
         return str(obj)
     def search(
@@ -91,7 +115,7 @@ class Indexer(BaseModel):
         """
         # Convert the query string to a vector
-        query_vector = self.text_to_vector(query)
+        query_vector = self.text_to_vector(query, cache=False)
         distances = []

linkml-store 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

Potentially problematic release.

linkml-store 0.1.7py3-none-any.whl → 0.1.8py3-none-any.whl