linkml-store 0.2.1__py3-none-any.whl → 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

@@ -1,3 +1,4 @@
1
+ import importlib
1
2
  import logging
2
3
  from pathlib import Path
3
4
  from typing import Dict, Optional, Union
@@ -7,23 +8,18 @@ from linkml_runtime import SchemaView
7
8
 
8
9
  from linkml_store.api import Database
9
10
  from linkml_store.api.config import ClientConfig
10
- from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
11
- from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
12
- from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
13
- from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
14
- from linkml_store.api.stores.neo4j.neo4j_database import Neo4jDatabase
15
- from linkml_store.api.stores.solr.solr_database import SolrDatabase
16
11
 
17
12
  logger = logging.getLogger(__name__)
18
13
 
19
14
 
15
+
20
16
  HANDLE_MAP = {
21
- "duckdb": DuckDBDatabase,
22
- "solr": SolrDatabase,
23
- "mongodb": MongoDBDatabase,
24
- "chromadb": ChromaDBDatabase,
25
- "neo4j": Neo4jDatabase,
26
- "file": FileSystemDatabase,
17
+ "duckdb": "linkml_store.api.stores.duckdb.duckdb_database.DuckDBDatabase",
18
+ "solr": "linkml_store.api.stores.solr.solr_database.SolrDatabase",
19
+ "mongodb": "linkml_store.api.stores.mongodb.mongodb_database.MongoDBDatabase",
20
+ "chromadb": "linkml_store.api.stores.chromadb.chromadb_database.ChromaDBDatabase",
21
+ "neo4j": "linkml_store.api.stores.neo4j.neo4j_database.Neo4jDatabase",
22
+ "file": "linkml_store.api.stores.filesystem.filesystem_database.FileSystemDatabase",
27
23
  }
28
24
 
29
25
 
@@ -155,6 +151,9 @@ class Client:
155
151
  if auto_attach:
156
152
  db = self.attach_database(handle, alias=name, **kwargs)
157
153
  db.from_config(db_config)
154
+ if db_config.source:
155
+ db = self.get_database(name)
156
+ db.store(db_config.source.data)
158
157
 
159
158
  def _set_database_config(self, db: Database):
160
159
  """
@@ -207,7 +206,14 @@ class Client:
207
206
  scheme, _ = handle.split(":", 1)
208
207
  if scheme not in HANDLE_MAP:
209
208
  raise ValueError(f"Unknown scheme: {scheme}")
210
- cls = HANDLE_MAP[scheme]
209
+ module_path, class_name = HANDLE_MAP[scheme].rsplit('.', 1)
210
+ try:
211
+ module = importlib.import_module(module_path)
212
+ cls = getattr(module, class_name)
213
+ except ImportError as e:
214
+ raise ImportError(f"Failed to import {scheme} database. Make sure the correct extras are installed: {e}")
215
+
216
+ #cls = HANDLE_MAP[scheme]
211
217
  db = cls(handle=handle, recreate_if_exists=recreate_if_exists, **kwargs)
212
218
  if schema_view:
213
219
  db.set_schema_view(schema_view)
@@ -470,6 +470,8 @@ class Collection(Generic[DatabaseType]):
470
470
  where: Optional[Any] = None,
471
471
  index_name: Optional[str] = None,
472
472
  limit: Optional[int] = None,
473
+ select_cols: Optional[List[str]] = None,
474
+ mmr_relevance_factor: Optional[float] = None,
473
475
  **kwargs,
474
476
  ) -> QueryResult:
475
477
  """
@@ -502,6 +504,7 @@ class Collection(Generic[DatabaseType]):
502
504
  :param where:
503
505
  :param index_name:
504
506
  :param limit:
507
+ :param select_cols:
505
508
  :param kwargs:
506
509
  :return:
507
510
  """
@@ -534,9 +537,14 @@ class Collection(Generic[DatabaseType]):
534
537
  index_col = ix.index_field
535
538
  # TODO: optimize this for large indexes
536
539
  vector_pairs = [(row, np.array(row[index_col], dtype=float)) for row in qr.rows]
537
- results = ix.search(query, vector_pairs, limit=limit)
540
+ results = ix.search(query, vector_pairs, limit=limit, mmr_relevance_factor=mmr_relevance_factor, **kwargs)
538
541
  for r in results:
539
542
  del r[1][index_col]
543
+ if select_cols:
544
+ new_results = []
545
+ for r in results:
546
+ new_results.append((r[0], {k: v for k, v in r[1].items() if k in select_cols}))
547
+ results = new_results
540
548
  new_qr = QueryResult(num_rows=len(results))
541
549
  new_qr.ranked_rows = results
542
550
  new_qr.rows = [r[1] for r in results]
@@ -671,6 +679,7 @@ class Collection(Generic[DatabaseType]):
671
679
  """
672
680
  yield from self.find({}, limit=-1).rows
673
681
 
682
+ @property
674
683
  def rows(self) -> List[OBJECT]:
675
684
  """
676
685
  Return a list of objects in the collection.
@@ -91,7 +91,7 @@ class CollectionConfig(ConfiguredBaseModel):
91
91
  )
92
92
  source: Optional[CollectionSource] = Field(
93
93
  default=None,
94
- description="Metadata about the source",
94
+ description="Source for the collection",
95
95
  )
96
96
  derived_from: Optional[List[DerivationConfiguration]] = Field(
97
97
  default=None,
@@ -154,6 +154,10 @@ class DatabaseConfig(ConfiguredBaseModel):
154
154
  default=False,
155
155
  description="Whether to ensure referential integrity",
156
156
  )
157
+ source: Optional[CollectionSource] = Field(
158
+ default=None,
159
+ description="Source for the database",
160
+ )
157
161
 
158
162
 
159
163
  class ClientConfig(ConfiguredBaseModel):
@@ -3,7 +3,7 @@ from pathlib import Path
3
3
  from typing import Optional
4
4
 
5
5
  import yaml
6
- from linkml.utils.schema_builder import SchemaBuilder
6
+ from linkml_runtime.utils.schema_builder import SchemaBuilder
7
7
  from linkml_runtime import SchemaView
8
8
 
9
9
  from linkml_store.api import Database
linkml_store/cli.py CHANGED
@@ -1,8 +1,9 @@
1
1
  import logging
2
2
  import sys
3
3
  import warnings
4
+ from collections import defaultdict
4
5
  from pathlib import Path
5
- from typing import Optional
6
+ from typing import Optional, Tuple, Any
6
7
 
7
8
  import click
8
9
  import yaml
@@ -134,12 +135,17 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
134
135
  logger.setLevel(logging.ERROR)
135
136
  ctx.ensure_object(dict)
136
137
  if input:
137
- stem = underscore(Path(input).stem)
138
- database = "duckdb"
139
- collection = stem
138
+ database = "duckdb" # default: store in duckdb
139
+ if input.startswith("http"):
140
+ parts = input.split("/")
141
+ collection = parts[-1]
142
+ collection = collection.split(".")[0]
143
+ else:
144
+ stem = underscore(Path(input).stem)
145
+ collection = stem
146
+ logger.info(f"Using input file: {input}, "
147
+ f"default storage is {database} and collection is {collection}")
140
148
  config = ClientConfig(databases={"duckdb": {"collections": {stem: {"source": {"local_path": input}}}}})
141
- # collection = Path(input).stem
142
- # database = f"file:{Path(input).parent}"
143
149
  if config is None and DEFAULT_LOCAL_CONF_PATH.exists():
144
150
  config = DEFAULT_LOCAL_CONF_PATH
145
151
  if config is None and DEFAULT_GLOBAL_CONF_PATH.exists():
@@ -177,10 +183,11 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
177
183
 
178
184
  @cli.command()
179
185
  @click.argument("files", type=click.Path(exists=True), nargs=-1)
186
+ @click.option("--replace/--no-replace", default=False, show_default=True, help="Replace existing objects")
180
187
  @click.option("--format", "-f", type=format_choice, help="Input format")
181
188
  @click.option("--object", "-i", multiple=True, help="Input object as YAML")
182
189
  @click.pass_context
183
- def insert(ctx, files, object, format):
190
+ def insert(ctx, files, replace, object, format):
184
191
  """Insert objects from files (JSON, YAML, TSV) into the specified collection.
185
192
 
186
193
  Using a configuration:
@@ -194,7 +201,6 @@ def insert(ctx, files, object, format):
194
201
  collection = settings.collection
195
202
  if not collection:
196
203
  raise ValueError("Collection must be specified.")
197
- objects = []
198
204
  if not files and not object:
199
205
  files = ["-"]
200
206
  for file_path in files:
@@ -203,13 +209,19 @@ def insert(ctx, files, object, format):
203
209
  else:
204
210
  objects = load_objects(file_path)
205
211
  logger.info(f"Inserting {len(objects)} objects from {file_path} into collection '{collection.alias}'.")
206
- collection.insert(objects)
212
+ if replace:
213
+ collection.replace(objects)
214
+ else:
215
+ collection.insert(objects)
207
216
  click.echo(f"Inserted {len(objects)} objects from {file_path} into collection '{collection.alias}'.")
208
217
  if object:
209
218
  for object_str in object:
210
219
  logger.info(f"Parsing: {object_str}")
211
220
  objects = yaml.safe_load(object_str)
212
- collection.insert(objects)
221
+ if replace:
222
+ collection.replace(objects)
223
+ else:
224
+ collection.insert(objects)
213
225
  click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{collection.alias}'.")
214
226
  collection.commit()
215
227
 
@@ -415,14 +427,6 @@ def list_collections(ctx, **kwargs):
415
427
  def fq(ctx, where, limit, columns, output_type, wide, output):
416
428
  """
417
429
  Query facets from the specified collection.
418
-
419
- :param ctx:
420
- :param where:
421
- :param limit:
422
- :param columns:
423
- :param output_type:
424
- :param output:
425
- :return:
426
430
  """
427
431
  collection = ctx.obj["settings"].collection
428
432
  where_clause = yaml.safe_load(where) if where else None
@@ -488,6 +492,41 @@ def describe(ctx, where, output_type, output, limit):
488
492
  write_output(df.describe(include="all").transpose(), output_type, target=output)
489
493
 
490
494
 
495
+ @cli.command()
496
+ @click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
497
+ @click.option("--limit", "-l", type=click.INT, help="Maximum number of results to return")
498
+ @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
499
+ @click.option("--output", "-o", type=click.Path(), help="Output file path")
500
+ @click.option("--index", "-I", help="Attributes to index on in pivot")
501
+ @click.option("--columns", "-A", help="Attributes to use as columns in pivot")
502
+ @click.option("--values", "-V", help="Attributes to use as values in pivot")
503
+ @click.pass_context
504
+ def pivot(ctx, where, limit, index, columns, values, output_type, output):
505
+ collection = ctx.obj["settings"].collection
506
+ where_clause = yaml.safe_load(where) if where else None
507
+ column_atts = columns.split(",") if columns else None
508
+ value_atts = values.split(",") if values else None
509
+ index_atts = index.split(",") if index else None
510
+ results = collection.find(where_clause, limit=limit)
511
+ pivoted = defaultdict(dict)
512
+ for row in results.rows:
513
+ index_key = tuple([row.get(att) for att in index_atts])
514
+ column_key = tuple([row.get(att) for att in column_atts])
515
+ value_key = tuple([row.get(att) for att in value_atts])
516
+ pivoted[index_key][column_key] = value_key
517
+ pivoted_objs = []
518
+ def detuple(t: Tuple) -> Any:
519
+ if len(t) == 1:
520
+ return t[0]
521
+ return str(t)
522
+ for index_key, data in pivoted.items():
523
+ obj = {att: key for att, key in zip(index_atts, index_key)}
524
+ for column_key, value_key in data.items():
525
+ obj[detuple(column_key)] = detuple(value_key)
526
+ pivoted_objs.append(obj)
527
+ write_output(pivoted_objs, output_type, target=output)
528
+
529
+
491
530
  @cli.command()
492
531
  @click.option("--output-type", "-O", type=format_choice, default=Format.YAML.value, help="Output format")
493
532
  @click.option("--output", "-o", type=click.Path(), help="Output file path")
@@ -506,10 +545,12 @@ def describe(ctx, where, output_type, output, limit):
506
545
  @click.option("--evaluation-count", "-n", type=click.INT, help="Number of examples to evaluate over")
507
546
  @click.option("--evaluation-match-function", help="Name of function to use for matching objects in eval")
508
547
  @click.option("--query", "-q", type=click.STRING, help="query term")
548
+ @click.option("--where", "-w", type=click.STRING, help="query term")
509
549
  @click.pass_context
510
550
  def infer(
511
551
  ctx,
512
552
  inference_config_file,
553
+ where,
513
554
  query,
514
555
  evaluation_count,
515
556
  evaluation_match_function,
@@ -551,6 +592,7 @@ def infer(
551
592
  linkml-store -i tests/input/iris.csv inference -t sklearn \
552
593
  -q '{"sepal_length": 5.1, "sepal_width": 3.5, "petal_length": 1.4, "petal_width": 0.2}'
553
594
  """
595
+ where_clause = yaml.safe_load(where) if where else None
554
596
  if query:
555
597
  query_obj = yaml.safe_load(query)
556
598
  else:
@@ -653,6 +695,7 @@ def schema(ctx, output_type, output):
653
695
  @cli.command()
654
696
  @click.argument("search_term")
655
697
  @click.option("--where", "-w", type=click.STRING, help="WHERE clause for the search")
698
+ @click.option("--select", "-s", type=click.STRING, help="SELECT clause for the query, as YAML")
656
699
  @click.option("--limit", "-l", type=click.INT, help="Maximum number of search results")
657
700
  @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
658
701
  @click.option("--output", "-o", type=click.Path(), help="Output file path")
@@ -661,13 +704,14 @@ def schema(ctx, output_type, output):
661
704
  )
662
705
  @index_type_option
663
706
  @click.pass_context
664
- def search(ctx, search_term, where, limit, index_type, output_type, output, auto_index):
707
+ def search(ctx, search_term, where, select, limit, index_type, output_type, output, auto_index):
665
708
  """Search objects in the specified collection."""
666
709
  collection = ctx.obj["settings"].collection
667
710
  ix = get_indexer(index_type)
668
711
  logger.info(f"Attaching index to collection {collection.alias}: {ix.model_dump()}")
669
712
  collection.attach_indexer(ix, auto_index=auto_index)
670
- result = collection.search(search_term, where=where, limit=limit)
713
+ select_cols = yaml.safe_load(select) if select else None
714
+ result = collection.search(search_term, where=where, select_cols=select_cols, limit=limit)
671
715
  output_data = render_output([{"score": row[0], **row[1]} for row in result.ranked_rows], output_type)
672
716
  if output:
673
717
  with open(output, "w") as f:
@@ -3,7 +3,6 @@ from pathlib import Path
3
3
  from typing import TYPE_CHECKING, List, Optional
4
4
 
5
5
  import numpy as np
6
- from tiktoken import encoding_for_model
7
6
 
8
7
  from linkml_store.api.config import CollectionConfig
9
8
  from linkml_store.index.indexer import INDEX_ITEM, Indexer
@@ -55,7 +54,7 @@ class LLMIndexer(Indexer):
55
54
 
56
55
  def texts_to_vectors(self, texts: List[str], cache: bool = None, **kwargs) -> List[INDEX_ITEM]:
57
56
  """
58
- Use LLM to embed
57
+ Use LLM to embed.
59
58
 
60
59
  >>> indexer = LLMIndexer(cached_embeddings_database="tests/input/llm_cache.db")
61
60
  >>> vectors = indexer.texts_to_vectors(["hello", "goodbye"])
@@ -63,20 +62,24 @@ class LLMIndexer(Indexer):
63
62
  :param texts:
64
63
  :return:
65
64
  """
65
+ from tiktoken import encoding_for_model
66
66
  logging.info(f"Converting {len(texts)} texts to vectors")
67
67
  model = self.embedding_model
68
- token_limit = get_token_limit(model.model_id)
68
+ # TODO: make this more accurate
69
+ token_limit = get_token_limit(model.model_id) - 200
69
70
  encoding = encoding_for_model("gpt-4o")
70
71
 
71
72
  def truncate_text(text: str) -> str:
72
73
  # split into tokens every 1000 chars:
73
74
  parts = [text[i : i + 1000] for i in range(0, len(text), 1000)]
74
- return render_formatted_text(
75
+ truncated = render_formatted_text(
75
76
  lambda x: "".join(x),
76
77
  parts,
77
78
  encoding,
78
79
  token_limit,
79
80
  )
81
+ logger.debug(f"Truncated text from {len(text)} to {len(truncated)}")
82
+ return truncated
80
83
 
81
84
  texts = [truncate_text(text) for text in texts]
82
85
 
@@ -3,6 +3,7 @@ from enum import Enum
3
3
  from typing import Any, Callable, Dict, List, Optional, Tuple
4
4
 
5
5
  import numpy as np
6
+ from linkml_store.utils.vector_utils import pairwise_cosine_similarity, mmr_diversified_search
6
7
  from pydantic import BaseModel
7
8
 
8
9
  INDEX_ITEM = np.ndarray
@@ -19,20 +20,6 @@ class TemplateSyntaxEnum(str, Enum):
19
20
  fstring = "fstring"
20
21
 
21
22
 
22
- def cosine_similarity(vector1, vector2) -> float:
23
- """
24
- Calculate the cosine similarity between two vectors
25
-
26
- :param vector1:
27
- :param vector2:
28
- :return:
29
- """
30
- dot_product = np.dot(vector1, vector2)
31
- norm1 = np.linalg.norm(vector1)
32
- norm2 = np.linalg.norm(vector2)
33
- return dot_product / (norm1 * norm2)
34
-
35
-
36
23
  class Indexer(BaseModel):
37
24
  """
38
25
  An indexer operates on a collection in order to search for objects.
@@ -79,7 +66,7 @@ class Indexer(BaseModel):
79
66
  to get a sense of how they work.
80
67
 
81
68
  >>> vectors = indexer.objects_to_vectors([{"name": "Aardvark"}, {"name": "Aardwolf"}, {"name": "Zesty"}])
82
- >>> assert cosine_similarity(vectors[0], vectors[1]) > cosine_similarity(vectors[0], vectors[2])
69
+ >>> assert pairwise_cosine_similarity(vectors[0], vectors[1]) > pairwise_cosine_similarity(vectors[0], vectors[2])
83
70
 
84
71
  Note you should consult the documentation for the specific indexer you are using for more details on
85
72
  how text is converted to vectors.
@@ -167,7 +154,8 @@ class Indexer(BaseModel):
167
154
  return str(obj)
168
155
 
169
156
  def search(
170
- self, query: str, vectors: List[Tuple[str, INDEX_ITEM]], limit: Optional[int] = None
157
+ self, query: str, vectors: List[Tuple[str, INDEX_ITEM]], limit: Optional[int] = None,
158
+ mmr_relevance_factor: Optional[float] = None
171
159
  ) -> List[Tuple[float, Any]]:
172
160
  """
173
161
  Use the indexer to search against a database of vectors.
@@ -183,13 +171,29 @@ class Indexer(BaseModel):
183
171
  # Convert the query string to a vector
184
172
  query_vector = self.text_to_vector(query, cache=False)
185
173
 
174
+ if mmr_relevance_factor is not None:
175
+ vlist = [v for _, v in vectors]
176
+ idlist = [id for id, _ in vectors]
177
+ sorted_indices = mmr_diversified_search(
178
+ query_vector, vlist,
179
+ relevance_factor=mmr_relevance_factor, top_n=limit)
180
+ results = []
181
+ # TODO: this is inefficient when limit is high
182
+ for i in range(limit):
183
+ if i >= len(sorted_indices):
184
+ break
185
+ pos = sorted_indices[i]
186
+ score = pairwise_cosine_similarity(query_vector, vlist[pos])
187
+ results.append((score, idlist[pos]))
188
+ return results
189
+
186
190
  distances = []
187
191
 
188
192
  # Iterate over each indexed item
189
193
  for item_id, item_vector in vectors:
190
194
  # Calculate the Euclidean distance between the query vector and the item vector
191
195
  # distance = 1-np.linalg.norm(query_vector - item_vector)
192
- distance = cosine_similarity(query_vector, item_vector)
196
+ distance = pairwise_cosine_similarity(query_vector, item_vector)
193
197
  distances.append((distance, item_id))
194
198
 
195
199
  # Sort the distances in ascending order
@@ -15,6 +15,10 @@ from linkml_store.utils.object_utils import select_nested
15
15
 
16
16
  logger = logging.getLogger(__name__)
17
17
 
18
+ MAX_ITERATIONS = 5
19
+ DEFAULT_NUM_EXAMPLES = 20
20
+ DEFAULT_MMR_RELEVANCE_FACTOR = 0.8
21
+
18
22
  SYSTEM_PROMPT = """
19
23
  You are a {llm_config.role}, your task is to inference the YAML
20
24
  object output given the YAML object input. I will provide you
@@ -32,6 +36,10 @@ class TrainedModel(BaseModel, extra="forbid"):
32
36
  config: Optional[InferenceConfig] = None
33
37
 
34
38
 
39
+ class RAGInference(Inference):
40
+ iterations: int = 0
41
+
42
+
35
43
  @dataclass
36
44
  class RAGInferenceEngine(InferenceEngine):
37
45
  """
@@ -103,7 +111,7 @@ class RAGInferenceEngine(InferenceEngine):
103
111
  def object_to_text(self, object: OBJECT) -> str:
104
112
  return yaml.dump(object)
105
113
 
106
- def derive(self, object: OBJECT) -> Optional[Inference]:
114
+ def derive(self, object: OBJECT, iteration=0, additional_prompt_texts: Optional[List[str]] = None) -> Optional[RAGInference]:
107
115
  import llm
108
116
  from tiktoken import encoding_for_model
109
117
 
@@ -113,15 +121,17 @@ class RAGInferenceEngine(InferenceEngine):
113
121
  model_name = self.config.llm_config.model_name
114
122
  feature_attributes = self.config.feature_attributes
115
123
  target_attributes = self.config.target_attributes
116
- num_examples = self.config.llm_config.number_of_few_shot_examples or 5
124
+ num_examples = self.config.llm_config.number_of_few_shot_examples or DEFAULT_NUM_EXAMPLES
117
125
  query_text = self.object_to_text(object)
126
+ mmr_relevance_factor = DEFAULT_MMR_RELEVANCE_FACTOR
118
127
  if not self.rag_collection:
119
128
  # TODO: zero-shot mode
120
129
  examples = []
121
130
  else:
122
131
  if not self.rag_collection.indexers:
123
132
  raise ValueError("RAG collection must have an indexer attached")
124
- rs = self.rag_collection.search(query_text, limit=num_examples, index_name="llm")
133
+ rs = self.rag_collection.search(query_text, limit=num_examples, index_name="llm",
134
+ mmr_relevance_factor=mmr_relevance_factor)
125
135
  examples = rs.rows
126
136
  if not examples:
127
137
  raise ValueError(f"No examples found for {query_text}; size = {self.rag_collection.size()}")
@@ -143,23 +153,43 @@ class RAGInferenceEngine(InferenceEngine):
143
153
  )
144
154
  prompt_clauses.append(prompt_clause)
145
155
 
146
- prompt_end = "---\nQuery:\n" f"## INPUT:\n{query_text}\n" "## OUTPUT:\n"
147
156
  system_prompt = SYSTEM_PROMPT.format(llm_config=self.config.llm_config)
157
+ system_prompt += "\n".join(additional_prompt_texts or [])
158
+ prompt_end = "---\nQuery:\n" f"## INPUT:\n{query_text}\n" "## OUTPUT:\n"
148
159
 
149
- def make_text(texts):
150
- return "\n".join(prompt_clauses) + prompt_end
160
+ def make_text(texts: List[str]):
161
+ return "\n".join(texts) + prompt_end
151
162
 
152
163
  try:
153
164
  encoding = encoding_for_model(model_name)
154
165
  except KeyError:
155
166
  encoding = encoding_for_model("gpt-4")
156
167
  token_limit = get_token_limit(model_name)
157
- prompt = render_formatted_text(make_text, prompt_clauses, encoding, token_limit)
168
+ prompt = render_formatted_text(make_text, values=prompt_clauses,
169
+ encoding=encoding, token_limit=token_limit,
170
+ additional_text=system_prompt)
158
171
  logger.info(f"Prompt: {prompt}")
159
172
  response = model.prompt(prompt, system_prompt)
160
173
  yaml_str = response.text()
161
174
  logger.info(f"Response: {yaml_str}")
162
- return Inference(predicted_object=self._parse_yaml_payload(yaml_str))
175
+ predicted_object = self._parse_yaml_payload(yaml_str, strict=True)
176
+ if self.config.validate_results:
177
+ base_collection = self.training_data.base_collection
178
+ errs = list(base_collection.iter_validate_collection([predicted_object]))
179
+ if errs:
180
+ print(f"{iteration} // FAILED TO VALIDATE: {yaml_str}")
181
+ print(f"PARSED: {predicted_object}")
182
+ print(f"ERRORS: {errs}")
183
+ if iteration > MAX_ITERATIONS:
184
+ raise ValueError(f"Validation errors: {errs}")
185
+ extra_texts = [
186
+ "Make sure results conform to the schema. Previously you provided:\n",
187
+ yaml_str,
188
+ "\nThis was invalid.\n",
189
+ "Validation errors:\n",
190
+ ] + [self.object_to_text(e) for e in errs]
191
+ return self.derive(object, iteration=iteration+1, additional_prompt_texts=extra_texts)
192
+ return RAGInference(predicted_object=predicted_object, iterations=iteration+1, query=object)
163
193
 
164
194
  def _parse_yaml_payload(self, yaml_str: str, strict=False) -> Optional[OBJECT]:
165
195
  if "```" in yaml_str:
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import List, Optional, Tuple
2
+ from typing import List, Optional, Tuple, Any
3
3
 
4
4
  from pydantic import BaseModel, ConfigDict, Field
5
5
 
@@ -36,6 +36,7 @@ class InferenceConfig(BaseModel, extra="forbid"):
36
36
  train_test_split: Optional[Tuple[float, float]] = None
37
37
  llm_config: Optional[LLMConfig] = None
38
38
  random_seed: Optional[int] = None
39
+ validate_results: Optional[bool] = None
39
40
 
40
41
  @classmethod
41
42
  def from_file(cls, file_path: str, format: Optional[Format] = None) -> "InferenceConfig":
@@ -58,6 +59,7 @@ class Inference(BaseModel, extra="forbid"):
58
59
  """
59
60
  Result of an inference derivation.
60
61
  """
61
-
62
+ query: Optional[OBJECT] = Field(default=None, description="The query object.")
62
63
  predicted_object: OBJECT = Field(..., description="The predicted object.")
63
64
  confidence: Optional[float] = Field(default=None, description="The confidence of the prediction.", le=1.0, ge=0.0)
65
+ explanation: Optional[Any] = Field(default=None, description="Explanation of the prediction.")
@@ -4,7 +4,7 @@ from abc import ABC
4
4
  from dataclasses import dataclass
5
5
  from enum import Enum
6
6
  from pathlib import Path
7
- from typing import Optional, TextIO, Tuple, Union
7
+ from typing import Optional, TextIO, Tuple, Union, Any
8
8
 
9
9
  import pandas as pd
10
10
  from pydantic import BaseModel, ConfigDict
@@ -67,13 +67,14 @@ class CollectionSlice(BaseModel):
67
67
  # slice: Tuple[Optional[int], Optional[int]] = Field(default=(None, None))
68
68
  indices: Optional[Tuple[int, ...]] = None
69
69
  _collection: Optional[Collection] = None
70
+ where: Any = None
70
71
 
71
72
  @property
72
73
  def collection(self) -> Collection:
73
74
  if not self._collection and not self.indices:
74
75
  return self.base_collection
75
76
  if not self._collection:
76
- rows = self.base_collection.find({}, limit=-1).rows
77
+ rows = self.base_collection.rows
77
78
  subset = [rows[i] for i in self.indices]
78
79
  db = self.base_collection.parent
79
80
  subset_name = self.slice_alias
@@ -94,6 +95,7 @@ class CollectionSlice(BaseModel):
94
95
  """
95
96
  Return the slice of the collection as a dataframe.
96
97
 
98
+ :param flattened: flattned nested objects to give keys like foo.bar
97
99
  :return:
98
100
  """
99
101
  rs = self.collection.find({}, limit=-1)
@@ -12,9 +12,9 @@ from typing import IO, Any, Dict, List, Optional, TextIO, Type, Union
12
12
 
13
13
  import pandas as pd
14
14
  import pystow
15
+ import xmltodict
15
16
  import yaml
16
17
  from pydantic import BaseModel
17
- from tabulate import tabulate
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
 
@@ -30,6 +30,7 @@ class Format(Enum):
30
30
  YAMLL = "yamll"
31
31
  TSV = "tsv"
32
32
  CSV = "csv"
33
+ XML = "xml"
33
34
  PYTHON = "python"
34
35
  PARQUET = "parquet"
35
36
  FORMATTED = "formatted"
@@ -50,6 +51,7 @@ class Format(Enum):
50
51
  ".yamll": cls.YAMLL,
51
52
  ".tsv": cls.TSV,
52
53
  ".csv": cls.CSV,
54
+ ".xml": cls.XML,
53
55
  ".py": cls.PYTHON,
54
56
  ".parquet": cls.PARQUET,
55
57
  ".pq": cls.PARQUET,
@@ -124,6 +126,8 @@ def process_file(
124
126
  delimiter = "\t" if format == Format.TSV else ","
125
127
  reader = csv.DictReader(f, delimiter=delimiter)
126
128
  objs = list(reader)
129
+ elif format == Format.XML:
130
+ objs = xmltodict.parse(f.read())
127
131
  elif format == Format.PARQUET:
128
132
  import pyarrow.parquet as pq
129
133
 
@@ -284,6 +288,7 @@ def render_output(
284
288
  elif format == Format.PYTHON:
285
289
  return str(data)
286
290
  elif format == Format.TABLE:
291
+ from tabulate import tabulate
287
292
  return tabulate(pd.DataFrame(data), headers="keys", tablefmt="psql")
288
293
  elif format == Format.YAML:
289
294
  if isinstance(data, list):
@@ -1,6 +1,10 @@
1
- from typing import Callable, List, Optional
1
+ import logging
2
+ from typing import Callable, List, Optional, TYPE_CHECKING
2
3
 
3
- from tiktoken import Encoding
4
+ if TYPE_CHECKING:
5
+ import tiktoken
6
+
7
+ logger = logging.getLogger(__name__)
4
8
 
5
9
  MODEL_TOKEN_MAPPING = {
6
10
  "gpt-4o-mini": 128_000,
@@ -40,7 +44,7 @@ MODEL_TOKEN_MAPPING = {
40
44
  def render_formatted_text(
41
45
  render_func: Callable,
42
46
  values: List[str],
43
- encoding: Encoding,
47
+ encoding: "tiktoken.Encoding",
44
48
  token_limit: int,
45
49
  additional_text: Optional[str] = None,
46
50
  ) -> str:
@@ -67,6 +71,7 @@ def render_formatted_text(
67
71
  if additional_text:
68
72
  token_limit -= len(encoding.encode(additional_text))
69
73
  text_length = len(encoding.encode(text))
74
+ logger.debug(f"Encoding length: {text_length} (original: {len(text)})")
70
75
  if text_length <= token_limit:
71
76
  return text
72
77
  if not values:
@@ -0,0 +1,165 @@
1
+ import logging
2
+ from typing import List, Tuple
3
+
4
+ import numpy as np
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ LOL = List[List[float]]
10
+
11
+ def pairwise_cosine_similarity(vector1: np.array, vector2: np.array) -> float:
12
+ """
13
+ Calculate the cosine similarity between two vectors.
14
+
15
+ >>> v100 = np.array([1, 0, 0])
16
+ >>> v010 = np.array([0, 1, 0])
17
+ >>> v001 = np.array([0, 0, 1])
18
+ >>> v011 = np.array([0, 1, 1])
19
+ >>> pairwise_cosine_similarity(v100, v010)
20
+ 0.0
21
+ >>> pairwise_cosine_similarity(v100, v001)
22
+ 0.0
23
+ >>> pairwise_cosine_similarity(v010, v001)
24
+ 0.0
25
+ >>> pairwise_cosine_similarity(v100, v100)
26
+ 1.0
27
+ >>> f"{pairwise_cosine_similarity(v010, v011):0.3f}"
28
+ '0.707'
29
+
30
+ :param vector1:
31
+ :param vector2:
32
+ :return:
33
+ """
34
+ dot_product = np.dot(vector1, vector2)
35
+ norm1 = np.linalg.norm(vector1)
36
+ norm2 = np.linalg.norm(vector2)
37
+ return dot_product / (norm1 * norm2)
38
+
39
+
40
+ def compute_cosine_similarity_matrix(list1: LOL, list2: LOL) -> np.ndarray:
41
+ """
42
+ Compute cosine similarity between two lists of vectors.
43
+
44
+ Result is a two column vector sim[ROW][COL] where ROW is from list1 and COL is from list2.
45
+
46
+ :param list1:
47
+ :param list2:
48
+ :return:
49
+ """
50
+ # Convert lists to numpy arrays
51
+ matrix1 = np.array(list1)
52
+ matrix2 = np.array(list2)
53
+
54
+ # Normalize the vectors in both matrices
55
+ matrix1_norm = matrix1 / np.linalg.norm(matrix1, axis=1)[:, np.newaxis]
56
+ matrix2_norm = matrix2 / np.linalg.norm(matrix2, axis=1)[:, np.newaxis]
57
+
58
+ # Compute dot products (resulting in cosine similarity values)
59
+ cosine_similarity_matrix = np.dot(matrix1_norm, matrix2_norm.T)
60
+
61
+ return cosine_similarity_matrix
62
+
63
+
64
+ def top_matches(cosine_similarity_matrix: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
65
+ """
66
+ Find the top match for each row in the cosine similarity matrix.
67
+
68
+ :param cosine_similarity_matrix:
69
+ :return:
70
+ """
71
+ # Find the index of the maximum value in each row
72
+ top_match_indices = np.argmax(cosine_similarity_matrix, axis=1)
73
+
74
+ # Find the maximum similarity value in each row
75
+ top_match_values = np.amax(cosine_similarity_matrix, axis=1)
76
+
77
+ return top_match_indices, top_match_values
78
+
79
+
80
+ def top_n_matches(
81
+ cosine_similarity_matrix: np.ndarray, n: int = 10
82
+ ) -> Tuple[np.ndarray, np.ndarray]:
83
+ # Find the indices that would sort each row in descending order
84
+ sorted_indices = np.argsort(-cosine_similarity_matrix, axis=1)
85
+
86
+ # Take the first n indices from the sorted indices to get the top n matches
87
+ top_n_indices = sorted_indices[:, :n]
88
+
89
+ # Take the first n values from the sorted values to get the top n match values
90
+ top_n_values = -np.sort(-cosine_similarity_matrix, axis=1)[:, :n]
91
+
92
+ return top_n_indices, top_n_values
93
+
94
+
95
+ def mmr_diversified_search(
96
+ query_vector: np.ndarray, document_vectors: List[np.ndarray], relevance_factor=0.5, top_n=None
97
+ ) -> List[int]:
98
+ """
99
+ Perform diversified search using Maximal Marginal Relevance (MMR).
100
+
101
+ :param query_vector: The vector representing the query.
102
+ :param document_vectors: The vectors representing the documents.
103
+ :param relevance_factor: The balance parameter between relevance and diversity.
104
+ :param top_n: The number of results to return. If None, return all.
105
+ :return: A list of indices representing the diversified order of documents.
106
+ """
107
+ if top_n is None:
108
+ # If no specific number of results is specified, return all
109
+ top_n = len(document_vectors)
110
+
111
+ if top_n == 0:
112
+ return []
113
+
114
+ # Calculate cosine similarities between query and all documents
115
+ norms_query = np.linalg.norm(query_vector)
116
+ norms_docs = np.linalg.norm(document_vectors, axis=1)
117
+ similarities = np.dot(document_vectors, query_vector) / (norms_docs * norms_query)
118
+
119
+ # Initialize set of selected indices and results list
120
+ selected_indices = set()
121
+ result_indices = []
122
+
123
+ # Diversified search loop
124
+ for _ in range(top_n):
125
+ max_mmr = float("-inf")
126
+ best_index = None
127
+
128
+ # Loop over all documents
129
+ for idx, _doc_vector in enumerate(document_vectors):
130
+ if idx not in selected_indices:
131
+ relevance = relevance_factor * similarities[idx]
132
+ diversity = 0
133
+
134
+ # Penalize based on similarity to already selected documents
135
+ if selected_indices:
136
+ max_sim_to_selected = max(
137
+ [
138
+ np.dot(document_vectors[idx], document_vectors[s])
139
+ / (
140
+ np.linalg.norm(document_vectors[idx])
141
+ * np.linalg.norm(document_vectors[s])
142
+ )
143
+ for s in selected_indices
144
+ ]
145
+ )
146
+ diversity = (1 - relevance_factor) * max_sim_to_selected
147
+
148
+ mmr_score = relevance - diversity
149
+
150
+ # Update best MMR score and index
151
+ if mmr_score > max_mmr:
152
+ max_mmr = mmr_score
153
+ best_index = idx
154
+
155
+ # Add the best document to the result and mark it as selected
156
+ if best_index is None:
157
+ logger.warning(f"No best index found over {len(document_vectors)} documents.")
158
+ continue
159
+ result_indices.append(best_index)
160
+ selected_indices.add(best_index)
161
+
162
+ return result_indices
163
+
164
+
165
+
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: linkml-store
3
- Version: 0.2.1
3
+ Version: 0.2.4
4
4
  Summary: linkml-store
5
5
  License: MIT
6
6
  Author: Author 1
@@ -12,9 +12,11 @@ Classifier: Programming Language :: Python :: 3.9
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
14
  Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Provides-Extra: all
15
17
  Provides-Extra: analytics
16
18
  Provides-Extra: app
17
- Provides-Extra: chromadb
19
+ Provides-Extra: bigquery
18
20
  Provides-Extra: fastapi
19
21
  Provides-Extra: frictionless
20
22
  Provides-Extra: h5py
@@ -29,25 +31,26 @@ Provides-Extra: scipy
29
31
  Provides-Extra: tests
30
32
  Provides-Extra: validation
31
33
  Requires-Dist: black (>=24.0.0) ; extra == "tests"
32
- Requires-Dist: chromadb ; extra == "chromadb"
33
34
  Requires-Dist: click
34
35
  Requires-Dist: duckdb (>=0.10.1)
35
36
  Requires-Dist: duckdb-engine (>=0.11.2)
36
37
  Requires-Dist: fastapi ; extra == "fastapi"
37
38
  Requires-Dist: frictionless ; extra == "frictionless"
38
39
  Requires-Dist: gcsfs ; extra == "ibis"
40
+ Requires-Dist: google-cloud-bigquery ; extra == "bigquery"
39
41
  Requires-Dist: h5py ; extra == "h5py"
40
42
  Requires-Dist: ibis-framework[duckdb,examples] (>=9.3.0) ; extra == "ibis"
41
43
  Requires-Dist: jinja2 (>=3.1.4,<4.0.0)
42
44
  Requires-Dist: jsonlines (>=4.0.0,<5.0.0)
45
+ Requires-Dist: jsonpatch (>=1.33,<2.0)
43
46
  Requires-Dist: linkml (>=1.8.0) ; extra == "validation"
44
47
  Requires-Dist: linkml-runtime (>=1.8.0)
45
48
  Requires-Dist: linkml_map ; extra == "map"
46
49
  Requires-Dist: linkml_renderer ; extra == "renderer"
47
- Requires-Dist: llm ; extra == "llm"
50
+ Requires-Dist: llm ; extra == "llm" or extra == "all"
48
51
  Requires-Dist: matplotlib ; extra == "analytics"
49
52
  Requires-Dist: multipledispatch ; extra == "ibis"
50
- Requires-Dist: neo4j ; extra == "neo4j"
53
+ Requires-Dist: neo4j ; extra == "neo4j" or extra == "all"
51
54
  Requires-Dist: networkx ; extra == "neo4j"
52
55
  Requires-Dist: pandas (>=2.2.1) ; extra == "analytics"
53
56
  Requires-Dist: plotly ; extra == "analytics"
@@ -62,8 +65,10 @@ Requires-Dist: scipy ; extra == "scipy"
62
65
  Requires-Dist: seaborn ; extra == "analytics"
63
66
  Requires-Dist: sqlalchemy
64
67
  Requires-Dist: streamlit (>=1.32.2,<2.0.0) ; extra == "app"
68
+ Requires-Dist: tabulate
65
69
  Requires-Dist: tiktoken ; extra == "llm"
66
70
  Requires-Dist: uvicorn ; extra == "fastapi"
71
+ Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
67
72
  Description-Content-Type: text/markdown
68
73
 
69
74
  # linkml-store
@@ -1,8 +1,8 @@
1
1
  linkml_store/__init__.py,sha256=jlU6WOUAn8cKIhzbTULmBTWpW9gZdEt7q_RI6KZN1bY,118
2
2
  linkml_store/api/__init__.py,sha256=3CelcFEFz0y3MkQAzhQ9JxHIt1zFk6nYZxSmYTo8YZE,226
3
- linkml_store/api/client.py,sha256=3klBXenQVbLjNQF3WmYfjASt3zvKOfWaCNp5aJM81Ec,12034
4
- linkml_store/api/collection.py,sha256=98qUYKVJOEzC9Sl9iBqxdBWnm_4Q8UT9r5UPRb4UoAU,39300
5
- linkml_store/api/config.py,sha256=71pxQ5jM-ETxJWU7CzmKjsH6IEJUMP5sml381u9TYVk,5654
3
+ linkml_store/api/client.py,sha256=wFVgl1NUovaKLqNVUEt9dsnoIzjzqFvktJVncAupdE4,12362
4
+ linkml_store/api/collection.py,sha256=CGvWxH7HRhjDt9Cp3SGdMqyhYx7Q4fRKUtAJV74_l0g,39711
5
+ linkml_store/api/config.py,sha256=pOz210JIwkEEXtfjcsZBp1UEedkBu8RkH62Qa1b4exI,5777
6
6
  linkml_store/api/database.py,sha256=nvae8jnOZsQIFCsl_lRBnKcvrpJg4A10ujIKGeMyUS8,29350
7
7
  linkml_store/api/queries.py,sha256=tx9fgGY5fC_2ZbIvg4BqTK_MXJwA_DI4mxr8HdQ6Vos,2075
8
8
  linkml_store/api/stores/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -15,7 +15,7 @@ linkml_store/api/stores/duckdb/duckdb_database.py,sha256=GH9bcOfHpNp6r-Eu1C3W0xu
15
15
  linkml_store/api/stores/duckdb/mappings.py,sha256=tDce3W1Apwammhf4LS6cRJ0m4NiJ0eB7vOI_4U5ETY8,148
16
16
  linkml_store/api/stores/filesystem/__init__.py,sha256=KjvCjdttwqMHNeGyL-gr59zRz0--HFEWWUNNCJ5hITs,347
17
17
  linkml_store/api/stores/filesystem/filesystem_collection.py,sha256=9gqY2KRZsn_RWk4eKkxFd3_wcxs5YaXvcBI7GGJBMGE,6751
18
- linkml_store/api/stores/filesystem/filesystem_database.py,sha256=sV-lueyrh3R6edyWkwN6qKa7yjPc8PIcF1rxgox6oA4,2875
18
+ linkml_store/api/stores/filesystem/filesystem_database.py,sha256=e9hSGoaOxr_sG_RhjgzV_yvdQ_xbHHXHJDtufWzAX4E,2883
19
19
  linkml_store/api/stores/hdf5/__init__.py,sha256=l4cIh3v7P0nPbwGIsfuCMD_serQ8q8c7iuUA9W2Jb4o,97
20
20
  linkml_store/api/stores/hdf5/hdf5_collection.py,sha256=mnpLMYehn3PuaIjp2dXrIWu8jh-bdQ84X2Ku83jMdEY,3805
21
21
  linkml_store/api/stores/hdf5/hdf5_database.py,sha256=EZbjrpaqiNDEFvoD5dZNcGBXA8z6HRNL81emueTZWNw,2714
@@ -30,30 +30,30 @@ linkml_store/api/stores/solr/solr_collection.py,sha256=ZlxC3JbVaHfSA4HuTeJTsp6qe
30
30
  linkml_store/api/stores/solr/solr_database.py,sha256=TFjqbY7jAkdrhAchbNg0E-mChSP7ogNwFExslbvX7Yo,2877
31
31
  linkml_store/api/stores/solr/solr_utils.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
32
  linkml_store/api/types.py,sha256=3aIQtDFMvsSmjuN5qrR2vNK5sHa6yzD_rEOPA6tHwvg,176
33
- linkml_store/cli.py,sha256=6pcVHM_hNH7EicleoCkjMVAbrGFZu8V_k2mv3aX0SH8,27703
33
+ linkml_store/cli.py,sha256=bWbWQita8KCBjzovBRzQqHtjbRrf7Ttxq0Fe8zrDuds,30235
34
34
  linkml_store/constants.py,sha256=x4ZmDsfE9rZcL5WpA93uTKrRWzCD6GodYXviVzIvR38,112
35
35
  linkml_store/graphs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  linkml_store/graphs/graph_map.py,sha256=bYRxv8n1YPnFqE9d6JKNmRawb8EAhsPlHhBue0gvtZE,712
37
37
  linkml_store/index/__init__.py,sha256=6SQzDe-WZSSqbGNsbCDfyPTyz0s9ISDKw1dm9xgQuT4,1396
38
38
  linkml_store/index/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
- linkml_store/index/implementations/llm_indexer.py,sha256=y1xvfUm_rl4UEiWJbsUsEnTCma98XRB9C1XOnuaAv5o,5474
39
+ linkml_store/index/implementations/llm_indexer.py,sha256=ja7UXhQj7F0g6HiRIJ8EBPuM86nOgr49jkh7eh_nCHs,5644
40
40
  linkml_store/index/implementations/simple_indexer.py,sha256=KnkFJtXTHnwjhD_D6ZK2rFhBID1dgCedcOVPEWAY2NU,1282
41
- linkml_store/index/indexer.py,sha256=3P-VtIEqitnMoaFjcXIIosoU7tJInop1Qq39QRbcT-8,7107
41
+ linkml_store/index/indexer.py,sha256=e5dsjh2wjOTDRsfClKJAFTbcK1UC7BOGkUCOfDg9omI,7635
42
42
  linkml_store/inference/__init__.py,sha256=b8NAFNZjOYU_8gOvxdyCyoiHOOl5Ai2ckKs1tv7ZkkY,342
43
43
  linkml_store/inference/evaluation.py,sha256=YDFYaEu2QLSfFq4oyARrnKfTiPLtNF8irhhspgVDfdY,6013
44
44
  linkml_store/inference/implementations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
- linkml_store/inference/implementations/rag_inference_engine.py,sha256=zmcbxmVZTm8ViSp7WFs8KHRNbzWXdZQl7J7VvcIjDyU,9049
45
+ linkml_store/inference/implementations/rag_inference_engine.py,sha256=mN7YQI-BeZglsAnZnNIuAj-Nxg1su5efNaohooEmNmM,10622
46
46
  linkml_store/inference/implementations/rule_based_inference_engine.py,sha256=0IEY_fsHJPJy6QKbYQU_qE87RRnPOXQxPuJKXCQG8jU,6250
47
47
  linkml_store/inference/implementations/sklearn_inference_engine.py,sha256=Sdi7CoRK3qoLJu3prgLy1Ck_zQ1gHWRKFybHe7XQ4_g,13192
48
- linkml_store/inference/inference_config.py,sha256=SbAlgQDRCWawWohe0IWX_Kvy-DFeaLYsN1HqrmLvc0k,2052
49
- linkml_store/inference/inference_engine.py,sha256=l2UB6cA0rW7a9qyiv8JF5Nzj8nRHGX_yqMYbiDnY1Qc,7055
48
+ linkml_store/inference/inference_config.py,sha256=EFGdigxWsfTPREbgqyJVRShN0JktCEmFLLoECrLfXSg,2282
49
+ linkml_store/inference/inference_engine.py,sha256=IxQIOgmXCDI8ilCGtoaVA_1wFROUg4uH1_yGbX78N2U,7139
50
50
  linkml_store/inference/inference_engine_registry.py,sha256=6o66gvBYBwdeAKm62zqqvfaBlcopVP_cla3L6uXGsHA,3015
51
51
  linkml_store/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
52
  linkml_store/utils/change_utils.py,sha256=O2rvSvgTKB60reLLz9mX5OWykAA_m93bwnUh5ZWa0EY,471
53
53
  linkml_store/utils/file_utils.py,sha256=rQ7-XpmI6_Kx_dhEnI98muFRr0MmgI_kZ_9cgJBf_0I,1411
54
- linkml_store/utils/format_utils.py,sha256=airJ2_tFsr0dTIbSHT5y0TZbDrvBBV4_qThFPFY5k8U,10925
54
+ linkml_store/utils/format_utils.py,sha256=sjpdJJ8Ww2ilm03mQt_v4QkZvQMymqUeTiPS3U1ViKM,11067
55
55
  linkml_store/utils/io.py,sha256=JHUrWDtlZC2jtN_PQZ4ypdGIyYlftZEN3JaCvEPs44w,884
56
- linkml_store/utils/llm_utils.py,sha256=3jRFUtEywoKdomKb3aCH1GdI9hQJOQo8Udb3Jy4M-Xw,2885
56
+ linkml_store/utils/llm_utils.py,sha256=0lvR_lBSDSuP-0Eum16QBUsSv8sWfDjZPz_MnDSPvn0,3048
57
57
  linkml_store/utils/mongodb_utils.py,sha256=Rl1YmMKs1IXwSsJIViSDChbi0Oer5cBnMmjka2TeQS8,4665
58
58
  linkml_store/utils/neo4j_utils.py,sha256=y3KPmDZ8mQmePgg0lUeKkeKqzEr2rV226xxEtHc5pRg,1266
59
59
  linkml_store/utils/object_utils.py,sha256=Vib-5Ip2DlRVKLZpU-008ZZI813-vfKVSCY0TksRenM,6293
@@ -64,6 +64,7 @@ linkml_store/utils/schema_utils.py,sha256=iJiZxo5NGr7v87h4DV6V9DrDOZHSswMRuf0N4V
64
64
  linkml_store/utils/sklearn_utils.py,sha256=itPpcrsbbyOazdjmivaaZ1lyZeytm0a0hJ2AS8ziUgg,7590
65
65
  linkml_store/utils/sql_utils.py,sha256=T41w_vsc3SauTJQkDMwid_nOtKW1YOKyUuaxEf470hk,5938
66
66
  linkml_store/utils/stats_utils.py,sha256=4KqBb1bqDgAmq-1fJLLu5B2paPgoZZc3A-gnyVam4bI,1799
67
+ linkml_store/utils/vector_utils.py,sha256=Q1RlpDzavJAM9-H2m2XNU5BNUcfZkpIWeEZii2hK0PQ,5449
67
68
  linkml_store/webapi/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
69
  linkml_store/webapi/html/__init__.py,sha256=hwp5eeBJKH65Bvv1x9Z4vsT1tLSYtb9Dq4I9r1kL1q0,69
69
70
  linkml_store/webapi/html/base.html.j2,sha256=hoiV2uaSxxrQp7VuAZBOHueH7czyJMYcPBRN6dZFYhk,693
@@ -72,8 +73,8 @@ linkml_store/webapi/html/database_details.html.j2,sha256=qtXdavbZb0mohiObI9dvJtk
72
73
  linkml_store/webapi/html/databases.html.j2,sha256=a9BCWQYfPeFhdUd31CWhB0yWhTIFXQayO08JgjyqKoc,294
73
74
  linkml_store/webapi/html/generic.html.j2,sha256=KtLaO2HUEF2Opq-OwHKgRKetNWe8IWc6JuIkxRPsywk,1018
74
75
  linkml_store/webapi/main.py,sha256=B0Da575kKR7X88N9ykm99Dem8FyBAW9f-w3A_JwUzfw,29165
75
- linkml_store-0.2.1.dist-info/LICENSE,sha256=77mDOslUnalYnuq9xQYZKtIoNEzcH9mIjvWHOKjamnE,1086
76
- linkml_store-0.2.1.dist-info/METADATA,sha256=ERSRCW1gMtcuLoWX-jKKfpgz10BBIUrGw-TvwXW3o-c,6977
77
- linkml_store-0.2.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
78
- linkml_store-0.2.1.dist-info/entry_points.txt,sha256=gWxVsHqx-t-UKWFHFzawQTvs4is4vC1rCF5AeKyqWWk,101
79
- linkml_store-0.2.1.dist-info/RECORD,,
76
+ linkml_store-0.2.4.dist-info/LICENSE,sha256=77mDOslUnalYnuq9xQYZKtIoNEzcH9mIjvWHOKjamnE,1086
77
+ linkml_store-0.2.4.dist-info/METADATA,sha256=PJX-_TSPk6WDXDCmvuFDUb5649ECQc2N6zP4pWqhBvU,7204
78
+ linkml_store-0.2.4.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
79
+ linkml_store-0.2.4.dist-info/entry_points.txt,sha256=gWxVsHqx-t-UKWFHFzawQTvs4is4vC1rCF5AeKyqWWk,101
80
+ linkml_store-0.2.4.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 2.0.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any