linkml-store 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of linkml-store might be problematic. Click here for more details.

Files changed (35) hide show
  1. linkml_store/api/client.py +76 -11
  2. linkml_store/api/collection.py +223 -40
  3. linkml_store/api/config.py +59 -9
  4. linkml_store/api/database.py +45 -27
  5. linkml_store/api/stores/duckdb/duckdb_collection.py +21 -3
  6. linkml_store/api/stores/duckdb/duckdb_database.py +36 -3
  7. linkml_store/api/stores/filesystem/filesystem_collection.py +13 -4
  8. linkml_store/api/stores/filesystem/filesystem_database.py +10 -1
  9. linkml_store/api/stores/mongodb/mongodb_collection.py +80 -34
  10. linkml_store/api/stores/mongodb/mongodb_database.py +1 -36
  11. linkml_store/api/stores/solr/solr_collection.py +4 -4
  12. linkml_store/cli.py +44 -18
  13. linkml_store/index/__init__.py +21 -5
  14. linkml_store/index/implementations/llm_indexer.py +2 -1
  15. linkml_store/index/indexer.py +20 -4
  16. linkml_store/utils/file_utils.py +37 -0
  17. linkml_store/utils/format_utils.py +69 -8
  18. linkml_store/utils/pandas_utils.py +40 -0
  19. linkml_store/utils/schema_utils.py +23 -0
  20. linkml_store/utils/sql_utils.py +2 -1
  21. linkml_store/webapi/__init__.py +0 -0
  22. linkml_store/webapi/html/__init__.py +3 -0
  23. linkml_store/webapi/html/base.html.j2 +24 -0
  24. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  25. linkml_store/webapi/html/database_details.html.j2 +16 -0
  26. linkml_store/webapi/html/databases.html.j2 +14 -0
  27. linkml_store/webapi/html/generic.html.j2 +46 -0
  28. linkml_store/webapi/main.py +572 -0
  29. linkml_store-0.1.11.dist-info/METADATA +171 -0
  30. linkml_store-0.1.11.dist-info/RECORD +60 -0
  31. {linkml_store-0.1.9.dist-info → linkml_store-0.1.11.dist-info}/entry_points.txt +1 -0
  32. linkml_store-0.1.9.dist-info/METADATA +0 -61
  33. linkml_store-0.1.9.dist-info/RECORD +0 -49
  34. {linkml_store-0.1.9.dist-info → linkml_store-0.1.11.dist-info}/LICENSE +0 -0
  35. {linkml_store-0.1.9.dist-info → linkml_store-0.1.11.dist-info}/WHEEL +0 -0
linkml_store/cli.py CHANGED
@@ -16,6 +16,7 @@ from linkml_store.index.implementations.simple_indexer import SimpleIndexer
16
16
  from linkml_store.index.indexer import Indexer
17
17
  from linkml_store.utils.format_utils import Format, guess_format, load_objects, render_output, write_output
18
18
  from linkml_store.utils.object_utils import object_path_update
19
+ from linkml_store.utils.pandas_utils import facet_summary_to_dataframe_unmelted
19
20
 
20
21
  index_type_option = click.option(
21
22
  "--index-type",
@@ -87,6 +88,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
87
88
  @click.option("--set", help="Metadata settings in the form PATHEXPR=value", multiple=True)
88
89
  @click.option("-v", "--verbose", count=True)
89
90
  @click.option("-q", "--quiet/--no-quiet")
91
+ @click.option("--base-dir", "-B", help="Base directory for the client configuration")
90
92
  @click.option(
91
93
  "--stacktrace/--no-stacktrace",
92
94
  default=False,
@@ -94,7 +96,7 @@ include_internal_option = click.option("--include-internal/--no-include-internal
94
96
  help="If set then show full stacktrace on error",
95
97
  )
96
98
  @click.pass_context
97
- def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, config, set):
99
+ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection, config, set, **kwargs):
98
100
  """A CLI for interacting with the linkml-store."""
99
101
  if not stacktrace:
100
102
  sys.tracebacklimit = 0
@@ -117,7 +119,7 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
117
119
  if quiet:
118
120
  logger.setLevel(logging.ERROR)
119
121
  ctx.ensure_object(dict)
120
- client = Client().from_config(config) if config else Client()
122
+ client = Client().from_config(config, **kwargs) if config else Client()
121
123
  settings = ContextSettings(client=client, database_name=database, collection_name=collection)
122
124
  ctx.obj["settings"] = settings
123
125
  # DEPRECATED
@@ -150,7 +152,7 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
150
152
  # raise ValueError("Collection must be specified if there are multiple collections.")
151
153
  if settings.database and settings.database.list_collections():
152
154
  collection = settings.database.list_collections()[0]
153
- settings.collection_name = collection.name
155
+ settings.collection_name = collection.alias
154
156
 
155
157
 
156
158
  @cli.command()
@@ -159,7 +161,15 @@ def cli(ctx, verbose: int, quiet: bool, stacktrace: bool, database, collection,
159
161
  @click.option("--object", "-i", multiple=True, help="Input object as YAML")
160
162
  @click.pass_context
161
163
  def insert(ctx, files, object, format):
162
- """Insert objects from files (JSON, YAML, TSV) into the specified collection."""
164
+ """Insert objects from files (JSON, YAML, TSV) into the specified collection.
165
+
166
+ Using a configuration:
167
+
168
+ linkml-store -C config.yaml -c genes insert data/genes/*.json
169
+
170
+ Note: if you don't provide a schema this will be inferred, but it is
171
+ usually better to provide an explicit schema
172
+ """
163
173
  settings = ctx.obj["settings"]
164
174
  collection = settings.collection
165
175
  if not collection:
@@ -172,15 +182,15 @@ def insert(ctx, files, object, format):
172
182
  objects = load_objects(file_path, format=format)
173
183
  else:
174
184
  objects = load_objects(file_path)
175
- logger.info(f"Inserting {len(objects)} objects from {file_path} into collection '{collection.name}'.")
185
+ logger.info(f"Inserting {len(objects)} objects from {file_path} into collection '{collection.alias}'.")
176
186
  collection.insert(objects)
177
- click.echo(f"Inserted {len(objects)} objects from {file_path} into collection '{collection.name}'.")
187
+ click.echo(f"Inserted {len(objects)} objects from {file_path} into collection '{collection.alias}'.")
178
188
  if object:
179
189
  for object_str in object:
180
190
  logger.info(f"Parsing: {object_str}")
181
191
  objects = yaml.safe_load(object_str)
182
192
  collection.insert(objects)
183
- click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{collection.name}'.")
193
+ click.echo(f"Inserted {len(objects)} objects from {object_str} into collection '{collection.alias}'.")
184
194
  collection.commit()
185
195
 
186
196
 
@@ -316,7 +326,7 @@ def query(ctx, where, limit, output_type, output):
316
326
  """
317
327
  collection = ctx.obj["settings"].collection
318
328
  where_clause = yaml.safe_load(where) if where else None
319
- query = Query(from_table=collection.name, where_clause=where_clause, limit=limit)
329
+ query = Query(from_table=collection.alias, where_clause=where_clause, limit=limit)
320
330
  result = collection.query(query)
321
331
  output_data = render_output(result.rows, output_type)
322
332
  if output:
@@ -333,7 +343,7 @@ def query(ctx, where, limit, output_type, output):
333
343
  def list_collections(ctx, **kwargs):
334
344
  db = ctx.obj["settings"].database
335
345
  for collection in db.list_collections(**kwargs):
336
- click.echo(collection.name)
346
+ click.echo(collection.alias)
337
347
  click.echo(render_output(collection.metadata))
338
348
 
339
349
 
@@ -343,8 +353,9 @@ def list_collections(ctx, **kwargs):
343
353
  @click.option("--output-type", "-O", type=format_choice, default="json", help="Output format")
344
354
  @click.option("--output", "-o", type=click.Path(), help="Output file path")
345
355
  @click.option("--columns", "-S", help="Columns to facet on")
356
+ @click.option("--wide/--no-wide", "-U/--no-U", default=False, show_default=True, help="Wide table")
346
357
  @click.pass_context
347
- def fq(ctx, where, limit, columns, output_type, output):
358
+ def fq(ctx, where, limit, columns, output_type, wide, output):
348
359
  """
349
360
  Query facets from the specified collection.
350
361
 
@@ -371,11 +382,22 @@ def fq(ctx, where, limit, columns, output_type, output):
371
382
  return "+".join([str(x) for x in key])
372
383
  return key
373
384
 
374
- count_dict = {}
375
- for key, value in results.items():
376
- value_as_dict = {_untuple(v[0:-1]): v[-1] for v in value}
377
- count_dict[_untuple(key)] = value_as_dict
378
- output_data = render_output(count_dict, output_type)
385
+ if wide:
386
+ results_obj = facet_summary_to_dataframe_unmelted(results)
387
+ else:
388
+ if output_type == Format.PYTHON.value:
389
+ results_obj = results
390
+ elif output_type in [Format.TSV.value, Format.CSV.value]:
391
+ results_obj = []
392
+ for fc, data in results.items():
393
+ for v, c in data:
394
+ results_obj.append({"facet": fc, "value": v, "count": c})
395
+ else:
396
+ results_obj = {}
397
+ for key, value in results.items():
398
+ value_as_dict = {_untuple(v[0:-1]): v[-1] for v in value}
399
+ results_obj[_untuple(key)] = value_as_dict
400
+ output_data = render_output(results_obj, output_type)
379
401
  if output:
380
402
  with open(output, "w") as f:
381
403
  f.write(output_data)
@@ -395,14 +417,17 @@ def _get_index(index_type=None, **kwargs) -> Indexer:
395
417
  @click.option("--where", "-w", type=click.STRING, help="WHERE clause for the query")
396
418
  @click.option("--output-type", "-O", type=format_choice, default=Format.FORMATTED.value, help="Output format")
397
419
  @click.option("--output", "-o", type=click.Path(), help="Output file path")
420
+ @click.option(
421
+ "--limit", "-l", default=-1, show_default=True, type=click.INT, help="Maximum number of results to return"
422
+ )
398
423
  @click.pass_context
399
- def describe(ctx, where, output_type, output):
424
+ def describe(ctx, where, output_type, output, limit):
400
425
  """
401
426
  Describe the collection schema.
402
427
  """
403
428
  where_clause = yaml.safe_load(where) if where else None
404
429
  collection = ctx.obj["settings"].collection
405
- df = collection.find(where_clause, limit=1).rows_dataframe
430
+ df = collection.find(where_clause, limit=limit).rows_dataframe
406
431
  write_output(df.describe(include="all").transpose(), output_type, target=output)
407
432
 
408
433
 
@@ -460,7 +485,7 @@ def search(ctx, search_term, where, limit, index_type, output_type, output, auto
460
485
  """Search objects in the specified collection."""
461
486
  collection = ctx.obj["settings"].collection
462
487
  ix = get_indexer(index_type)
463
- logger.info(f"Attaching index to collection {collection.name}: {ix.model_dump()}")
488
+ logger.info(f"Attaching index to collection {collection.alias}: {ix.model_dump()}")
464
489
  collection.attach_indexer(ix, auto_index=auto_index)
465
490
  result = collection.search(search_term, where=where, limit=limit)
466
491
  output_data = render_output([{"score": row[0], **row[1]} for row in result.ranked_rows], output_type)
@@ -490,6 +515,7 @@ def indexes(ctx):
490
515
  def validate(ctx, output_type, output):
491
516
  """Validate objects in the specified collection."""
492
517
  collection = ctx.obj["settings"].collection
518
+ logger.info(f"Validating collection {collection.alias}")
493
519
  validation_results = [json_dumper.to_dict(x) for x in collection.iter_validate_collection()]
494
520
  output_data = render_output(validation_results, output_type)
495
521
  if output:
@@ -1,3 +1,14 @@
1
+ """
2
+ Indexers package.
3
+
4
+ Indexers allow indexes to be added to existing :class:`Collection` objects.
5
+
6
+ Current two are supported:
7
+
8
+ * simple: :class:`SimpleIndexer`
9
+ * llm: :class:`LLMIndexer`
10
+ """
11
+
1
12
  from typing import Type
2
13
 
3
14
  from linkml_store.index.implementations.llm_indexer import LLMIndexer
@@ -14,7 +25,7 @@ def get_indexer_class(name: str) -> Type[Indexer]:
14
25
  """
15
26
  Get an indexer class by name.
16
27
 
17
- :param name: the name of the indexer
28
+ :param name: the name of the indexer (simple, llm, ...)
18
29
  :return: the indexer class
19
30
  """
20
31
  if name not in INDEXER_CLASSES:
@@ -22,16 +33,21 @@ def get_indexer_class(name: str) -> Type[Indexer]:
22
33
  return INDEXER_CLASSES[name]
23
34
 
24
35
 
25
- def get_indexer(name: str, **kwargs) -> Indexer:
36
+ def get_indexer(index_type: str, **kwargs) -> Indexer:
26
37
  """
27
38
  Get an indexer by name.
28
39
 
29
- :param name: the name of the indexer
40
+ >>> simple_indexer = get_indexer("simple")
41
+ >>> llm_indexer = get_indexer("llm")
42
+
43
+ :param name: the name of the indexer (simple, llm, ...)
30
44
  :param kwargs: additional arguments to pass to the indexer
31
45
  :return: the indexer
32
46
  """
33
47
  kwargs = {k: v for k, v in kwargs.items() if v is not None}
34
- cls = get_indexer_class(name)
35
- kwargs["name"] = name
48
+ cls = get_indexer_class(index_type)
49
+ kwargs["index_type"] = index_type
36
50
  indexer = cls(**kwargs)
51
+ if not indexer.name:
52
+ indexer.name = index_type
37
53
  return indexer
@@ -74,7 +74,7 @@ class LLMIndexer(Indexer):
74
74
 
75
75
  embeddings_client = Client()
76
76
  config = CollectionConfig(
77
- name=coll_name,
77
+ alias=coll_name,
78
78
  type="Embeddings",
79
79
  attributes={
80
80
  "text": {"range": "string"},
@@ -116,6 +116,7 @@ class LLMIndexer(Indexer):
116
116
  embeddings_collection.insert(
117
117
  {"text": uncached_texts[i], "embedding": embeddings[index], "model_id": model_id}
118
118
  )
119
+ embeddings_collection.commit()
119
120
  else:
120
121
  logger.info(f"Embedding {len(texts)} texts")
121
122
  embeddings = model.embed_multi(texts)
@@ -11,11 +11,22 @@ logger = logging.getLogger(__name__)
11
11
 
12
12
 
13
13
  class TemplateSyntaxEnum(str, Enum):
14
+ """
15
+ Template syntax types.
16
+ """
17
+
14
18
  jinja2 = "jinja2"
15
19
  fstring = "fstring"
16
20
 
17
21
 
18
- def cosine_similarity(vector1, vector2):
22
+ def cosine_similarity(vector1, vector2) -> float:
23
+ """
24
+ Calculate the cosine similarity between two vectors
25
+
26
+ :param vector1:
27
+ :param vector2:
28
+ :return:
29
+ """
19
30
  dot_product = np.dot(vector1, vector2)
20
31
  norm1 = np.linalg.norm(vector1)
21
32
  norm2 = np.linalg.norm(vector2)
@@ -24,10 +35,11 @@ def cosine_similarity(vector1, vector2):
24
35
 
25
36
  class Indexer(BaseModel):
26
37
  """
27
- An index operates on a collection in order to search for objects.
38
+ An indexer operates on a collection in order to search for objects.
28
39
  """
29
40
 
30
41
  name: Optional[str] = None
42
+ index_type: Optional[str] = None
31
43
  index_function: Optional[Callable] = None
32
44
  distance_function: Optional[Callable] = None
33
45
  index_attributes: Optional[List[str]] = None
@@ -93,13 +105,17 @@ class Indexer(BaseModel):
93
105
  if "{%" in self.text_template or "{{" in self.text_template:
94
106
  logger.info("Detected Jinja2 syntax in text template")
95
107
  syntax = TemplateSyntaxEnum.jinja2
96
- if syntax and syntax == TemplateSyntaxEnum.jinja2:
108
+ if not syntax:
109
+ syntax = TemplateSyntaxEnum.fstring
110
+ if syntax == TemplateSyntaxEnum.jinja2:
97
111
  from jinja2 import Template
98
112
 
99
113
  template = Template(self.text_template)
100
114
  return template.render(**obj)
101
- else:
115
+ elif syntax == TemplateSyntaxEnum.fstring:
102
116
  return self.text_template.format(**obj)
117
+ else:
118
+ raise NotImplementedError(f"Cannot handle template syntax: {syntax}")
103
119
  return str(obj)
104
120
 
105
121
  def search(
@@ -0,0 +1,37 @@
1
+ import logging
2
+ import shutil
3
+ import tempfile
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+ # Set up logging
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def safe_remove_directory(dir_path: Path, no_backup: bool = False) -> Optional[Path]:
13
+ # Ensure the directory exists
14
+ if not dir_path.exists():
15
+ raise FileNotFoundError(f"Directory does not exist: {dir_path}")
16
+ try:
17
+ if no_backup:
18
+ # Move to a temporary directory instead of permanent removal
19
+ with tempfile.TemporaryDirectory() as tmpdir:
20
+ tmp_path = Path(tmpdir) / dir_path.name
21
+ shutil.move(str(dir_path), str(tmp_path))
22
+ logger.info(f"Directory moved to temporary location: {tmp_path}")
23
+ # The directory will be automatically removed when exiting the context manager
24
+ return None
25
+ else:
26
+ # Create a backup directory name with timestamp
27
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
28
+ backup_dir = dir_path.with_name(f"{dir_path.name}_backup_{timestamp}")
29
+
30
+ # Move the directory to the backup location
31
+ shutil.move(str(dir_path), str(backup_dir))
32
+ logger.info(f"Directory backed up to: {backup_dir}")
33
+ return backup_dir
34
+
35
+ except Exception as e:
36
+ logger.error(f"An error occurred: {e}")
37
+ return None
@@ -7,8 +7,10 @@ from pathlib import Path
7
7
  from typing import Any, Dict, List, Optional, TextIO, Type, Union
8
8
 
9
9
  import pandas as pd
10
+ import pystow
10
11
  import yaml
11
12
  from pydantic import BaseModel
13
+ from tabulate import tabulate
12
14
 
13
15
 
14
16
  class Format(Enum):
@@ -21,12 +23,40 @@ class Format(Enum):
21
23
  YAML = "yaml"
22
24
  TSV = "tsv"
23
25
  CSV = "csv"
26
+ PYTHON = "python"
24
27
  PARQUET = "parquet"
25
28
  FORMATTED = "formatted"
29
+ TABLE = "table"
30
+
31
+
32
+ def load_objects_from_url(
33
+ url: str,
34
+ format: Union[Format, str] = None,
35
+ expected_type: Type = None,
36
+ local_path: Optional[str] = None,
37
+ **kwargs,
38
+ ) -> List[Dict[str, Any]]:
39
+ """
40
+ Load objects from a URL in JSON, JSONLines, YAML, CSV, or TSV format.
41
+
42
+ :param url: The URL to the file.
43
+ :param format: The format of the file. Can be a Format enum or a string value.
44
+ :param expected_type: The target type to load the objects into.
45
+ :param local_path: The local path to save the file to.
46
+ :return: A list of dictionaries representing the loaded objects.
47
+ """
48
+ local_path = pystow.ensure("linkml", "linkml-store", url=url)
49
+ objs = load_objects(local_path, format=format, expected_type=expected_type, **kwargs)
50
+ if not objs:
51
+ raise ValueError(f"No objects loaded from URL: {url}")
52
+ return objs
26
53
 
27
54
 
28
55
  def load_objects(
29
- file_path: Union[str, Path], format: Union[Format, str] = None, expected_type: Type = None
56
+ file_path: Union[str, Path],
57
+ format: Union[Format, str] = None,
58
+ expected_type: Type = None,
59
+ header_comment_token: Optional[str] = None,
30
60
  ) -> List[Dict[str, Any]]:
31
61
  """
32
62
  Load objects from a file in JSON, JSONLines, YAML, CSV, or TSV format.
@@ -37,7 +67,7 @@ def load_objects(
37
67
 
38
68
  :param file_path: The path to the file.
39
69
  :param format: The format of the file. Can be a Format enum or a string value.
40
- :param expected_type: The target type to load the objects into.
70
+ :param expected_type: The target type to load the objects into, e.g. list
41
71
  :return: A list of dictionaries representing the loaded objects.
42
72
  """
43
73
  if isinstance(format, str):
@@ -48,6 +78,12 @@ def load_objects(
48
78
 
49
79
  if not format and (file_path.endswith(".parquet") or file_path.endswith(".pq")):
50
80
  format = Format.PARQUET
81
+ if not format and file_path.endswith(".tsv"):
82
+ format = Format.TSV
83
+ if not format and file_path.endswith(".csv"):
84
+ format = Format.CSV
85
+ if not format and file_path.endswith(".py"):
86
+ format = Format.PYTHON
51
87
 
52
88
  mode = "r"
53
89
  if format == Format.PARQUET:
@@ -64,15 +100,33 @@ def load_objects(
64
100
  elif format == Format.JSONL or (not format and file_path.endswith(".jsonl")):
65
101
  objs = [json.loads(line) for line in f]
66
102
  elif format == Format.YAML or (not format and (file_path.endswith(".yaml") or file_path.endswith(".yml"))):
67
- if expected_type and expected_type == list:
103
+ if expected_type and expected_type == list: # noqa E721
68
104
  objs = list(yaml.safe_load_all(f))
69
105
  else:
70
106
  objs = yaml.safe_load(f)
71
- elif format == Format.TSV or (not format and file_path.endswith(".tsv")):
72
- reader = csv.DictReader(f, delimiter="\t")
73
- objs = list(reader)
74
- elif format == Format.CSV or (not format and file_path.endswith(".csv")):
75
- reader = csv.DictReader(f)
107
+ elif format == Format.TSV or format == Format.CSV:
108
+ # Skip initial comment lines if comment_char is set
109
+ if header_comment_token:
110
+ # Store the original position
111
+ original_pos = f.tell()
112
+
113
+ # Read and store lines until we find a non-comment line
114
+ lines = []
115
+ for line in f:
116
+ if not line.startswith(header_comment_token):
117
+ break
118
+ lines.append(line)
119
+
120
+ # Go back to the original position
121
+ f.seek(original_pos)
122
+
123
+ # Skip the comment lines we found
124
+ for _ in lines:
125
+ f.readline()
126
+ if format == Format.TSV:
127
+ reader = csv.DictReader(f, delimiter="\t")
128
+ else:
129
+ reader = csv.DictReader(f)
76
130
  objs = list(reader)
77
131
  elif format == Format.PARQUET:
78
132
  import pyarrow.parquet as pq
@@ -151,6 +205,9 @@ def render_output(
151
205
  if isinstance(data, pd.DataFrame):
152
206
  data = data.to_dict(orient="records")
153
207
 
208
+ if isinstance(data, dict) and format in [Format.TSV, Format.CSV]:
209
+ data = [data]
210
+
154
211
  if isinstance(data, BaseModel):
155
212
  data = data.model_dump()
156
213
 
@@ -158,6 +215,10 @@ def render_output(
158
215
  return json.dumps(data, indent=2, default=str)
159
216
  elif format == Format.JSONL:
160
217
  return "\n".join(json.dumps(obj) for obj in data)
218
+ elif format == Format.PYTHON:
219
+ return str(data)
220
+ elif format == Format.TABLE:
221
+ return tabulate(pd.DataFrame(data), headers="keys", tablefmt="psql")
161
222
  elif format == Format.YAML:
162
223
  if isinstance(data, list):
163
224
  return yaml.safe_dump_all(data, sort_keys=False)
@@ -0,0 +1,40 @@
1
+ from typing import Dict, List, Tuple, Union
2
+
3
+ import pandas as pd
4
+
5
+
6
+ def facet_summary_to_dataframe_unmelted(
7
+ facet_summary: Dict[Union[str, Tuple[str, ...]], List[Tuple[Union[str, Tuple[str, ...]], int]]]
8
+ ) -> pd.DataFrame:
9
+ rows = []
10
+
11
+ for facet_type, facet_data in facet_summary.items():
12
+ if isinstance(facet_type, str):
13
+ # Single facet type
14
+ for category, value in facet_data:
15
+ rows.append({facet_type: category, "Value": value})
16
+ else:
17
+ # Multiple facet types
18
+ for cat_val_tuple in facet_data:
19
+ if len(cat_val_tuple) == 2:
20
+ categories, value = cat_val_tuple
21
+ else:
22
+ categories, value = cat_val_tuple[:-1], cat_val_tuple[-1]
23
+ row = {"Value": value}
24
+ for i, facet in enumerate(facet_type):
25
+ row[facet] = categories[i]
26
+ rows.append(row)
27
+
28
+ df = pd.DataFrame(rows)
29
+
30
+ # Ensure all columns are present, fill with None if missing
31
+ all_columns = set(col for facet in facet_summary.keys() for col in (facet if isinstance(facet, tuple) else [facet]))
32
+ for col in all_columns:
33
+ if col not in df.columns:
34
+ df[col] = None
35
+
36
+ # Move 'Value' to the end
37
+ cols = [col for col in df.columns if col != "Value"] + ["Value"]
38
+ df = df[cols]
39
+
40
+ return df
@@ -0,0 +1,23 @@
1
+ from typing import List
2
+
3
+ from linkml_runtime import SchemaView
4
+ from linkml_runtime.linkml_model import SlotDefinition
5
+
6
+
7
+ def path_to_attribute_list(class_name: str, path: str, schema_view: SchemaView) -> List[SlotDefinition]:
8
+ """
9
+ Convert a path to a list of attributes.
10
+
11
+ :param path:
12
+ :return:
13
+ """
14
+ parts = path.split(".")
15
+ att_list = []
16
+ while parts:
17
+ part = parts.pop(0)
18
+ att = schema_view.induced_slot(part, class_name)
19
+ if not att:
20
+ raise ValueError(f"Attribute {part} not found in class {class_name}")
21
+ att_list.append(att)
22
+ class_name = att.range
23
+ return att_list
@@ -19,8 +19,9 @@ TYPE_MAP = {
19
19
 
20
20
  OP_MAP = {
21
21
  "eq": "=",
22
+ "$in": "ARRAY_CONTAINS", ## mongodb
23
+ "$contains": "ARRAY_CONTAINS", ## TODO: this is chromadb-specific
22
24
  "in": "ARRAY_CONTAINS",
23
- "$contains": "ARRAY_CONTAINS",
24
25
  }
25
26
 
26
27
 
File without changes
@@ -0,0 +1,3 @@
1
+ from pathlib import Path
2
+
3
+ HTML_TEMPLATES_DIR = Path(__file__).parent
@@ -0,0 +1,24 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>{% block title %}LinkML Store API{% endblock %}</title>
7
+ <style>
8
+ body { font-family: Arial, sans-serif; line-height: 1.6; padding: 20px; }
9
+ h1 { color: #333; }
10
+ a { color: #0066cc; }
11
+ .navigation { margin-bottom: 20px; }
12
+ .content { margin-top: 20px; }
13
+ </style>
14
+ </head>
15
+ <body>
16
+ <div class="navigation">
17
+ <a href="/pages/">Home</a> |
18
+ <a href="/pages/databases">Databases</a>
19
+ </div>
20
+ <div class="content">
21
+ {% block content %}{% endblock %}
22
+ </div>
23
+ </body>
24
+ </html>
@@ -0,0 +1,15 @@
1
+ {% extends "base.html.j2" %}
2
+
3
+ {% block content %}
4
+ <h1>{{ response.meta.title }}</h1>
5
+ <p>Name: {{ params.collection_name }}</p>
6
+
7
+ <h2>Collections</h2>
8
+ <ul>
9
+ {% for collection in response.data.collections %}
10
+ <li>
11
+ <a href="/pages{{ collection.links|selectattr('rel', 'equalto', 'self')|first|attr('href') }}">{{ collection.name }}</a>
12
+ </li>
13
+ {% endfor %}
14
+ </ul>
15
+ {% endblock %}
@@ -0,0 +1,16 @@
1
+ {% extends "base.html.j2" %}
2
+
3
+ {% block content %}
4
+ <h1>{{ response.meta.title }}</h1>
5
+ <p>Handle: {{ response.data.handle }}</p>
6
+ <p>Number of collections: {{ response.data.num_collections }}</p>
7
+
8
+ <h2>Collections</h2>
9
+ <ul>
10
+ {% for collection in response.data.collections %}
11
+ <li>
12
+ <a href="/pages{{ collection.links|selectattr('rel', 'equalto', 'self')|first|attr('href') }}">{{ collection.name }}</a>
13
+ </li>
14
+ {% endfor %}
15
+ </ul>
16
+ {% endblock %}
@@ -0,0 +1,14 @@
1
+ {% extends "base.html.j2" %}
2
+
3
+ {% block title %}LinkML Store API - Databases{% endblock %}
4
+
5
+ {% block content %}
6
+ <h1>Databases</h1>
7
+ <ul>
8
+ {% for db in response.data.databases %}
9
+ <li>
10
+ <a href="/pages/databases/{{ db.name }}">{{ db.name }}</a>
11
+ </li>
12
+ {% endfor %}
13
+ </ul>
14
+ {% endblock %}
@@ -0,0 +1,46 @@
1
+ {% extends "base.html.j2" %}
2
+
3
+ {% block title %}{meta.path}{% endblock %}
4
+
5
+ {% block content %}
6
+ <h1>Meta</h1>
7
+ <pre>
8
+ {{ response.meta }}
9
+ </pre>
10
+
11
+ <h1>Links</h1>
12
+ <ul>
13
+ {% for link in response.links %}
14
+ <li>
15
+ <a href="/pages{{ link.href }}">{{ link.rel }} ({{ link.href }})</a>
16
+ </li>
17
+ {% endfor %}
18
+ </ul>
19
+ </ul>
20
+
21
+ <h1>Data</h1>
22
+ {% if data_html %}
23
+ <ul>
24
+ {% for e in data_html %}
25
+ <li>{{ e|safe }}</li>
26
+ {% endfor %}
27
+ </ul>
28
+ {% else %}
29
+
30
+ {% if "items" in response.data %}
31
+ <ul>
32
+ {% for item in response.data['items'] %}
33
+ <li>
34
+ {{ item.name }}
35
+ {% for link in item.links %}
36
+ <a href="/pages{{ link.href }}">{{ link.rel }}</a>
37
+ {% endfor %}
38
+ </li>
39
+ {% endfor %}
40
+ </ul>
41
+ {% endif %}
42
+ <pre>
43
+ {{ response.data }}
44
+ </pre>
45
+ {% endif %}
46
+ {% endblock %}