linkml-store 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. linkml_store/__init__.py +7 -0
  2. linkml_store/api/__init__.py +8 -0
  3. linkml_store/api/client.py +414 -0
  4. linkml_store/api/collection.py +1280 -0
  5. linkml_store/api/config.py +187 -0
  6. linkml_store/api/database.py +862 -0
  7. linkml_store/api/queries.py +69 -0
  8. linkml_store/api/stores/__init__.py +0 -0
  9. linkml_store/api/stores/chromadb/__init__.py +7 -0
  10. linkml_store/api/stores/chromadb/chromadb_collection.py +121 -0
  11. linkml_store/api/stores/chromadb/chromadb_database.py +89 -0
  12. linkml_store/api/stores/dremio/__init__.py +10 -0
  13. linkml_store/api/stores/dremio/dremio_collection.py +555 -0
  14. linkml_store/api/stores/dremio/dremio_database.py +1052 -0
  15. linkml_store/api/stores/dremio/mappings.py +105 -0
  16. linkml_store/api/stores/dremio_rest/__init__.py +11 -0
  17. linkml_store/api/stores/dremio_rest/dremio_rest_collection.py +502 -0
  18. linkml_store/api/stores/dremio_rest/dremio_rest_database.py +1023 -0
  19. linkml_store/api/stores/duckdb/__init__.py +16 -0
  20. linkml_store/api/stores/duckdb/duckdb_collection.py +339 -0
  21. linkml_store/api/stores/duckdb/duckdb_database.py +283 -0
  22. linkml_store/api/stores/duckdb/mappings.py +8 -0
  23. linkml_store/api/stores/filesystem/__init__.py +15 -0
  24. linkml_store/api/stores/filesystem/filesystem_collection.py +186 -0
  25. linkml_store/api/stores/filesystem/filesystem_database.py +81 -0
  26. linkml_store/api/stores/hdf5/__init__.py +7 -0
  27. linkml_store/api/stores/hdf5/hdf5_collection.py +104 -0
  28. linkml_store/api/stores/hdf5/hdf5_database.py +79 -0
  29. linkml_store/api/stores/ibis/__init__.py +5 -0
  30. linkml_store/api/stores/ibis/ibis_collection.py +488 -0
  31. linkml_store/api/stores/ibis/ibis_database.py +328 -0
  32. linkml_store/api/stores/mongodb/__init__.py +25 -0
  33. linkml_store/api/stores/mongodb/mongodb_collection.py +379 -0
  34. linkml_store/api/stores/mongodb/mongodb_database.py +114 -0
  35. linkml_store/api/stores/neo4j/__init__.py +0 -0
  36. linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
  37. linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
  38. linkml_store/api/stores/solr/__init__.py +3 -0
  39. linkml_store/api/stores/solr/solr_collection.py +224 -0
  40. linkml_store/api/stores/solr/solr_database.py +83 -0
  41. linkml_store/api/stores/solr/solr_utils.py +0 -0
  42. linkml_store/api/types.py +4 -0
  43. linkml_store/cli.py +1147 -0
  44. linkml_store/constants.py +7 -0
  45. linkml_store/graphs/__init__.py +0 -0
  46. linkml_store/graphs/graph_map.py +24 -0
  47. linkml_store/index/__init__.py +53 -0
  48. linkml_store/index/implementations/__init__.py +0 -0
  49. linkml_store/index/implementations/llm_indexer.py +174 -0
  50. linkml_store/index/implementations/simple_indexer.py +43 -0
  51. linkml_store/index/indexer.py +211 -0
  52. linkml_store/inference/__init__.py +13 -0
  53. linkml_store/inference/evaluation.py +195 -0
  54. linkml_store/inference/implementations/__init__.py +0 -0
  55. linkml_store/inference/implementations/llm_inference_engine.py +154 -0
  56. linkml_store/inference/implementations/rag_inference_engine.py +276 -0
  57. linkml_store/inference/implementations/rule_based_inference_engine.py +169 -0
  58. linkml_store/inference/implementations/sklearn_inference_engine.py +314 -0
  59. linkml_store/inference/inference_config.py +66 -0
  60. linkml_store/inference/inference_engine.py +209 -0
  61. linkml_store/inference/inference_engine_registry.py +74 -0
  62. linkml_store/plotting/__init__.py +5 -0
  63. linkml_store/plotting/cli.py +826 -0
  64. linkml_store/plotting/dimensionality_reduction.py +453 -0
  65. linkml_store/plotting/embedding_plot.py +489 -0
  66. linkml_store/plotting/facet_chart.py +73 -0
  67. linkml_store/plotting/heatmap.py +383 -0
  68. linkml_store/utils/__init__.py +0 -0
  69. linkml_store/utils/change_utils.py +17 -0
  70. linkml_store/utils/dat_parser.py +95 -0
  71. linkml_store/utils/embedding_matcher.py +424 -0
  72. linkml_store/utils/embedding_utils.py +299 -0
  73. linkml_store/utils/enrichment_analyzer.py +217 -0
  74. linkml_store/utils/file_utils.py +37 -0
  75. linkml_store/utils/format_utils.py +550 -0
  76. linkml_store/utils/io.py +38 -0
  77. linkml_store/utils/llm_utils.py +122 -0
  78. linkml_store/utils/mongodb_utils.py +145 -0
  79. linkml_store/utils/neo4j_utils.py +42 -0
  80. linkml_store/utils/object_utils.py +190 -0
  81. linkml_store/utils/pandas_utils.py +93 -0
  82. linkml_store/utils/patch_utils.py +126 -0
  83. linkml_store/utils/query_utils.py +89 -0
  84. linkml_store/utils/schema_utils.py +23 -0
  85. linkml_store/utils/sklearn_utils.py +193 -0
  86. linkml_store/utils/sql_utils.py +177 -0
  87. linkml_store/utils/stats_utils.py +53 -0
  88. linkml_store/utils/vector_utils.py +158 -0
  89. linkml_store/webapi/__init__.py +0 -0
  90. linkml_store/webapi/html/__init__.py +3 -0
  91. linkml_store/webapi/html/base.html.j2 +24 -0
  92. linkml_store/webapi/html/collection_details.html.j2 +15 -0
  93. linkml_store/webapi/html/database_details.html.j2 +16 -0
  94. linkml_store/webapi/html/databases.html.j2 +14 -0
  95. linkml_store/webapi/html/generic.html.j2 +43 -0
  96. linkml_store/webapi/main.py +855 -0
  97. linkml_store-0.3.0.dist-info/METADATA +226 -0
  98. linkml_store-0.3.0.dist-info/RECORD +101 -0
  99. linkml_store-0.3.0.dist-info/WHEEL +4 -0
  100. linkml_store-0.3.0.dist-info/entry_points.txt +3 -0
  101. linkml_store-0.3.0.dist-info/licenses/LICENSE +22 -0
@@ -0,0 +1,145 @@
1
+ import logging
2
+ import os
3
+ import subprocess
4
+ from pathlib import Path
5
+ from typing import Optional
6
+ from urllib.parse import urlparse
7
+
8
+ from pymongo import MongoClient
9
+ from pymongo.database import Database
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def extract_connection_info(db: Database):
15
+ client = db.client
16
+
17
+ # Get the host and port
18
+ host_info = client.address
19
+ if host_info:
20
+ host, port = host_info
21
+ else:
22
+ # For replica sets or sharded clusters, we might need to get this differently
23
+ host = client.HOST
24
+ port = client.PORT
25
+
26
+ # Get the database name
27
+ db_name = db.name
28
+
29
+ # Get username if available
30
+ username = None
31
+ if hasattr(client, "options") and hasattr(client.options, "credentials"):
32
+ credentials = client.options.credentials
33
+ if credentials:
34
+ username = credentials.username
35
+
36
+ return {"host": host, "port": port, "db_name": db_name, "username": username}
37
+
38
+
39
+ def get_connection_string(client: MongoClient):
40
+ """
41
+ Extract a connection string from the MongoClient.
42
+ This avoids triggering truth value testing on Database objects.
43
+ """
44
+ if client.address:
45
+ host, port = client.address
46
+ return f"{host}:{port}"
47
+ if hasattr(client, "address") and client.address:
48
+ host, port = client.address
49
+ return f"{host}:{port}"
50
+ elif client.hosts:
51
+ # For replica sets, return all hosts
52
+ return ",".join(f"{host}:{port}" for host, port in client.hosts)
53
+ elif hasattr(client, "HOST"):
54
+ # If we can't determine hosts, use the entire URI
55
+ parsed_uri = urlparse(client.HOST)
56
+ return f"{parsed_uri.hostname}:{parsed_uri.port}"
57
+ else:
58
+ raise ValueError("Unable to determine connection string from client")
59
+
60
+
61
+ def get_connection_info(db: Database):
62
+ """
63
+ Extract connection information from the Database object.
64
+ """
65
+ # Get the name of the database
66
+ db_name = db.name
67
+
68
+ # Get the client's node list (this should work for single nodes and replica sets)
69
+ node_list = db.client.nodes
70
+
71
+ if not node_list:
72
+ raise ValueError("Unable to determine connection information from database")
73
+
74
+ # Use the first node in the list (for single node setups, this will be the only node)
75
+ first_node = node_list[0]
76
+ host, port = first_node
77
+
78
+ return host, port, db_name
79
+
80
+
81
+ def get_auth_from_client(client: MongoClient):
82
+ """Extract authentication details from MongoClient."""
83
+ if hasattr(client, "_MongoClient__options"):
84
+ # For older versions of PyMongo
85
+ options = client._MongoClient__options
86
+ elif hasattr(client, "options"):
87
+ # For newer versions of PyMongo
88
+ options = client.options
89
+ else:
90
+ return None, None, None
91
+
92
+ if hasattr(options, "credentials"):
93
+ creds = options.credentials
94
+ return creds.username, creds.password, creds.source
95
+ return None, None, None
96
+
97
+
98
+ def connection_from_handle(handle: str):
99
+ if handle.startswith("mongodb://"):
100
+ handle = handle.replace("mongodb://", "")
101
+ host, db = handle.split("/")
102
+ return host, db
103
+
104
+
105
+ def export_mongodb(handle: str, location: str, password: Optional[str] = None):
106
+ host, db_name = connection_from_handle(handle)
107
+
108
+ # Construct the mongodump command
109
+ cmd = ["mongodump", f"--host={host}", f"--db={db_name}"]
110
+ logger.info(f"Exporting MongoDB database {db_name} from {host} to {location}")
111
+ cmd.extend(["--out", location])
112
+ result = subprocess.run(cmd, check=True, capture_output=True, text=True)
113
+ logger.info(f"MongoDB export completed successfully. Output: {result.stdout}")
114
+
115
+
116
+ def import_mongodb(handle: str, dump_dir: str, drop: bool = False):
117
+ host, db_name = connection_from_handle(handle)
118
+
119
+ # list dirs in dump_dir
120
+ dir_path = Path(dump_dir)
121
+ if not dir_path.is_dir():
122
+ raise ValueError(f"{dir_path} is not a dir")
123
+ directories = [name for name in os.listdir(dump_dir)]
124
+ if len(directories) != 1:
125
+ raise ValueError(f"Expected exactly one database in {dump_dir}, got: {directories}")
126
+ src_db_name = directories[0]
127
+
128
+ # Construct the mongorestore command
129
+ cmd = [
130
+ "mongorestore",
131
+ f"--host={host}",
132
+ f"--nsFrom={src_db_name}.*",
133
+ f"--nsTo={db_name}.*",
134
+ str(dump_dir),
135
+ ]
136
+
137
+ # Add drop option if specified
138
+ if drop:
139
+ cmd.append("--drop")
140
+ logger.info(f"CMD={cmd}")
141
+ # Execute mongorestore
142
+ result = subprocess.run(cmd, check=True, capture_output=True, text=True)
143
+ if result.stderr:
144
+ logger.warning(result.stderr)
145
+ logger.info(f"MongoDB import completed successfully. Output: {result.stdout} // {result.stderr}")
@@ -0,0 +1,42 @@
1
+ import networkx as nx
2
+ from py2neo import Graph
3
+
4
+
5
+ def draw_neo4j_graph(handle="bolt://localhost:7687", auth=("neo4j", None)):
6
+ # Connect to Neo4j
7
+ graph = Graph(handle, auth=auth)
8
+
9
+ # Run a Cypher query
10
+ query = """
11
+ MATCH (n)-[r]->(m)
12
+ RETURN n, r, m
13
+ LIMIT 100
14
+ """
15
+ result = graph.run(query)
16
+
17
+ # Create a NetworkX graph
18
+ G = nx.DiGraph() # Use DiGraph for directed edges
19
+ for record in result:
20
+ n = record["n"]
21
+ m = record["m"]
22
+ r = record["r"]
23
+ G.add_node(n["name"], label=list(n.labels or ["-"])[0])
24
+ G.add_node(m["name"], label=list(m.labels or ["-"])[0])
25
+ G.add_edge(n["name"], m["name"], type=type(r).__name__)
26
+
27
+ # Draw the graph
28
+ pos = nx.spring_layout(G)
29
+
30
+ # Draw nodes
31
+ nx.draw_networkx_nodes(G, pos, node_color="lightblue", node_size=10000)
32
+
33
+ # Draw edges
34
+ nx.draw_networkx_edges(G, pos, edge_color="gray", arrows=True)
35
+
36
+ # Add node labels
37
+ node_labels = nx.get_node_attributes(G, "label")
38
+ nx.draw_networkx_labels(G, pos, {node: f"{node}\n({label})" for node, label in node_labels.items()}, font_size=16)
39
+
40
+ # Add edge labels
41
+ edge_labels = nx.get_edge_attributes(G, "type")
42
+ nx.draw_networkx_edge_labels(G, pos, edge_labels, font_size=16)
@@ -0,0 +1,190 @@
1
+ import json
2
+ from copy import deepcopy
3
+ from typing import Any, Dict, List, Optional, Union
4
+
5
+ from pydantic import BaseModel
6
+
7
+
8
+ def object_path_update(
9
+ obj: Union[BaseModel, Dict[str, Any]], path: str, value: Any
10
+ ) -> Union[BaseModel, Dict[str, Any]]:
11
+ """
12
+ Updates a nested object based on a path description and a value. The path to the
13
+ desired field is given in dot and bracket notation (e.g., 'a[0].b.c[1]').
14
+
15
+ :param obj: The dictionary object to be updated.
16
+ :type obj: Dict[str, Any]
17
+ :param path: The path string indicating where to place the value within the object.
18
+ :type path: str
19
+ :param value: The value to be set at the specified path.
20
+ :type value: Any
21
+ :return: None. This function modifies the object in-place.
22
+ :rtype: None
23
+
24
+ **Example**::
25
+
26
+ >>> data = {}
27
+ >>> object_path_update(data, 'persons[0].foo.bar', 1)
28
+ {'persons': [{'foo': {'bar': 1}}]}
29
+ """
30
+ if isinstance(obj, BaseModel):
31
+ typ = type(obj)
32
+ obj = obj.model_dump(exclude_none=True)
33
+ obj = object_path_update(obj, path, value)
34
+ return typ(**obj)
35
+ obj = deepcopy(obj)
36
+ ret_obj = obj
37
+ parts = path.split(".")
38
+ for part in parts[:-1]:
39
+ if "[" in part:
40
+ key, index = part[:-1].split("[")
41
+ index = int(index)
42
+ # obj = obj.setdefault(key, [{} for _ in range(index+1)])
43
+ obj = obj.setdefault(key, [])
44
+ while len(obj) <= index:
45
+ obj.append({})
46
+ obj = obj[index]
47
+ else:
48
+ if part in obj and obj[part] is None:
49
+ del obj[part]
50
+ obj = obj.setdefault(part, {})
51
+ last_part = parts[-1]
52
+ if "[" in last_part:
53
+ key, index = last_part[:-1].split("[")
54
+ index = int(index)
55
+ if key not in obj or not isinstance(obj[key], list):
56
+ obj[key] = [{} for _ in range(index + 1)]
57
+ obj[key][index] = value
58
+ else:
59
+ obj[last_part] = value
60
+ return ret_obj
61
+
62
+
63
+ def object_path_get(obj: Union[BaseModel, Dict[str, Any]], path: str, default_value=None) -> Any:
64
+ """
65
+ Retrieves a value from a nested object based on a path description. The path to the
66
+ desired field is given in dot and bracket notation (e.g., 'a[0].b.c[1]').
67
+
68
+ :param obj: The dictionary object to be updated.
69
+ :type obj: Dict[str, Any]
70
+ :param path: The path string indicating where to place the value within the object.
71
+ :type path: str
72
+ :return: The value at the specified path.
73
+ :rtype: Any
74
+
75
+ **Example**::
76
+
77
+ >>> data = {'persons': [{'foo': {'bar': 1}}]}
78
+ >>> object_path_get(data, 'persons[0].foo.bar')
79
+ 1
80
+ >>> object_path_get(data, 'persons[0].foo')
81
+ {'bar': 1}
82
+ >>> object_path_get({}, 'not there', "NA")
83
+ 'NA'
84
+ """
85
+ if isinstance(obj, BaseModel):
86
+ obj = obj.model_dump()
87
+ parts = path.split(".")
88
+ for part in parts:
89
+ if "[" in part:
90
+ key, index = part[:-1].split("[")
91
+ index = int(index)
92
+ if key in obj and obj[key] is not None:
93
+ obj = obj[key][index]
94
+ else:
95
+ return default_value
96
+ else:
97
+ if isinstance(obj, list):
98
+ obj = [v1.get(part, default_value) for v1 in obj]
99
+ else:
100
+ obj = obj.get(part, default_value)
101
+ return obj
102
+
103
+
104
+ def parse_update_expression(expr: str) -> Union[tuple[str, Any], None]:
105
+ """
106
+ Parse a string expression of the form 'path.to.field=value' into a path and a value.
107
+
108
+ :param expr:
109
+ :return:
110
+ """
111
+ try:
112
+ path, val = expr.split("=", 1)
113
+ val = json.loads(val)
114
+ except ValueError:
115
+ return None
116
+ return path, val
117
+
118
+
119
+ def clean_empties(value: Union[Dict, List]) -> Any:
120
+ if isinstance(value, dict):
121
+ value = {k: v for k, v in ((k, clean_empties(v)) for k, v in value.items()) if v is not None}
122
+ elif isinstance(value, list):
123
+ value = [v for v in (clean_empties(v) for v in value) if v is not None]
124
+ return value
125
+
126
+
127
+ def select_nested(data: dict, paths: List[Union[str, List[str]]], current_path=None) -> Optional[dict]:
128
+ """
129
+ Select nested attributes from a complex dictionary based on selector strings.
130
+
131
+ Args:
132
+ data (dict): The input nested dictionary.
133
+ paths (list): A list of selector strings.
134
+
135
+ Returns:
136
+ dict: A new dictionary with the same structure, but only the selected attributes.
137
+
138
+ Example:
139
+ >>> data = {
140
+ ... "person": {
141
+ ... "name": "John Doe",
142
+ ... "age": 30,
143
+ ... "address": {
144
+ ... "street": "123 Main St",
145
+ ... "city": "Anytown",
146
+ ... "country": "USA"
147
+ ... },
148
+ ... "phones": [
149
+ ... {"type": "home", "number": "555-1234"},
150
+ ... {"type": "work", "number": "555-5678"}
151
+ ... ]
152
+ ... },
153
+ ... "company": {
154
+ ... "name": "Acme Inc",
155
+ ... "location": "New York"
156
+ ... }
157
+ ... }
158
+ >>> select_nested(data, ["person.address.street", "person.address.city"])
159
+ {'person': {'address': {'street': '123 Main St', 'city': 'Anytown'}}}
160
+ >>> select_nested(data, ["person.phones.number", "person.phones.type"])
161
+ {'person': {'phones': [{'type': 'home', 'number': '555-1234'}, {'type': 'work', 'number': '555-5678'}]}}
162
+ >>> select_nested(data, ["person"])
163
+ {'person': {'name': 'John Doe', 'age': 30, 'address': {'street': '123 Main St', 'city': 'Anytown',
164
+ 'country': 'USA'}, 'phones': [{'type': 'home', 'number': '555-1234'}, {'type': 'work', 'number': '555-5678'}]}}
165
+ >>> select_nested(data, ["person.phones.type"])
166
+ {'person': {'phones': [{'type': 'home'}, {'type': 'work'}]}}
167
+ """
168
+ if current_path is None:
169
+ current_path = []
170
+ matching_paths = []
171
+ if not paths:
172
+ raise ValueError("No paths provided")
173
+ for path in paths:
174
+ if isinstance(path, str):
175
+ path = path.split(".")
176
+ if path == current_path:
177
+ return data
178
+ if path[: len(current_path)] == current_path:
179
+ matching_paths.append(path)
180
+ if not matching_paths:
181
+ return None
182
+ if isinstance(data, dict):
183
+ new_obj = {k: select_nested(v, matching_paths, current_path + [k]) for k, v in data.items()}
184
+ new_obj = {k: v for k, v in new_obj.items() if v is not None}
185
+ return new_obj
186
+ if isinstance(data, list):
187
+ new_obj = [select_nested(v, matching_paths, current_path + []) for i, v in enumerate(data)]
188
+ new_obj = [v for v in new_obj if v is not None]
189
+ return new_obj
190
+ return data
@@ -0,0 +1,93 @@
1
+ import logging
2
+ from typing import Any, Dict, List, Tuple, Union
3
+
4
+ import pandas as pd
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+
9
+ def flatten_dict(d: Dict[str, Any], parent_key: str = "", sep: str = ".") -> Dict[str, Any]:
10
+ """
11
+ Recursively flatten a nested dictionary.
12
+
13
+ Args:
14
+ d (Dict[str, Any]): The dictionary to flatten.
15
+ parent_key (str): The parent key for nested dictionaries.
16
+ sep (str): The separator to use between keys.
17
+
18
+ Returns:
19
+ Dict[str, Any]: A flattened dictionary.
20
+
21
+ >>> flatten_dict({'a': 1, 'b': {'c': 2, 'd': {'e': 3}}})
22
+ {'a': 1, 'b.c': 2, 'b.d.e': 3}
23
+ """
24
+ items = []
25
+ for k, v in d.items():
26
+ new_key = f"{parent_key}{sep}{k}" if parent_key else k
27
+ if isinstance(v, dict):
28
+ items.extend(flatten_dict(v, new_key, sep=sep).items())
29
+ else:
30
+ items.append((new_key, v))
31
+ return dict(items)
32
+
33
+
34
+ def nested_objects_to_dataframe(data: List[Dict[str, Any]]) -> pd.DataFrame:
35
+ """
36
+ Convert a list of nested objects to a flattened pandas DataFrame.
37
+
38
+ Args:
39
+ data (List[Dict[str, Any]]): A list of nested dictionaries.
40
+
41
+ Returns:
42
+ pd.DataFrame: A flattened DataFrame.
43
+
44
+ >>> data = [
45
+ ... {"person": {"name": "Alice", "age": 30}, "job": {"title": "Engineer", "salary": 75000}},
46
+ ... {"person": {"name": "Bob", "age": 35}, "job": {"title": "Manager", "salary": 85000}}
47
+ ... ]
48
+ >>> df = nested_objects_to_dataframe(data)
49
+ >>> df.columns.tolist()
50
+ ['person.name', 'person.age', 'job.title', 'job.salary']
51
+ >>> df['person.name'].tolist()
52
+ ['Alice', 'Bob']
53
+ """
54
+ flattened_data = [flatten_dict(item) for item in data]
55
+ return pd.DataFrame(flattened_data)
56
+
57
+
58
+ def facet_summary_to_dataframe_unmelted(
59
+ facet_summary: Dict[Union[str, Tuple[str, ...]], List[Tuple[Union[str, Tuple[str, ...]], int]]],
60
+ ) -> pd.DataFrame:
61
+ rows = []
62
+
63
+ for facet_type, facet_data in facet_summary.items():
64
+ if isinstance(facet_type, str):
65
+ # Single facet type
66
+ for category, value in facet_data:
67
+ rows.append({facet_type: category, "Value": value})
68
+ else:
69
+ # Multiple facet types
70
+ for cat_val_tuple in facet_data:
71
+ if len(cat_val_tuple) == 2:
72
+ categories, value = cat_val_tuple
73
+ else:
74
+ categories, value = cat_val_tuple[:-1], cat_val_tuple[-1]
75
+ row = {"Value": value}
76
+ for i, facet in enumerate(facet_type):
77
+ logger.debug(f"FT={facet_type} i={i} Facet: {facet}, categories: {categories}")
78
+ row[facet] = categories[i] if len(categories) > i else None
79
+ rows.append(row)
80
+
81
+ df = pd.DataFrame(rows)
82
+
83
+ # Ensure all columns are present, fill with None if missing
84
+ all_columns = set(col for facet in facet_summary.keys() for col in (facet if isinstance(facet, tuple) else [facet]))
85
+ for col in all_columns:
86
+ if col not in df.columns:
87
+ df[col] = None
88
+
89
+ # Move 'Value' to the end
90
+ cols = [col for col in df.columns if col != "Value"] + ["Value"]
91
+ df = df[cols]
92
+
93
+ return df
@@ -0,0 +1,126 @@
1
+ from typing import Any, Dict, List, Optional, TypedDict
2
+
3
+ import jsonpatch
4
+
5
+
6
+ class PatchDict(TypedDict):
7
+ op: str
8
+ path: str
9
+ value: Optional[Any]
10
+ _from: Optional[str]
11
+
12
+
13
+ def apply_patches(obj: Any, patches: List[PatchDict], primary_key: Optional[str] = None, in_place=False) -> Any:
14
+ """
15
+ Apply a set of patches to an object.
16
+
17
+ If the object is a list, the primary key must be specified.
18
+
19
+ >>> objs = [{'id': 'F1', 'name': 'Cheese'}, {'id': 'F2', 'name': 'Bread'}]
20
+ >>> patches = [{'op': 'replace', 'path': '/F1/name', 'value': 'Toast'}]
21
+ >>> apply_patches(objs, patches, primary_key='id')
22
+ [{'id': 'F1', 'name': 'Toast'}, {'id': 'F2', 'name': 'Bread'}]
23
+
24
+ :param obj: object to patch
25
+ :param patches: list of patches, conforming to the JSON Patch format
26
+ :param primary_key: key to use as the primary key for the objects (if obj is a list)
27
+ :param in_place: whether to apply the patches in place
28
+ :return:
29
+ """
30
+ if isinstance(obj, dict):
31
+ patch_obj = jsonpatch.JsonPatch(patches)
32
+ return patch_obj.apply(obj, in_place=in_place)
33
+ elif isinstance(obj, list):
34
+ if not primary_key:
35
+ raise ValueError("Primary key must be specified for list objects")
36
+ return apply_patches_to_list(obj, patches, primary_key, in_place=in_place)
37
+ else:
38
+ raise ValueError(f"Unsupported object type: {type(obj)}")
39
+
40
+
41
+ def apply_patches_to_list(
42
+ objects: List[Dict[str, Any]], patches: List[PatchDict], primary_key: str, in_place=False
43
+ ) -> List[Dict[str, Any]]:
44
+ """
45
+ Apply a set of patches to a list of objects.
46
+
47
+
48
+
49
+ :param objects: list of objects
50
+ :param patches: list of patches, conforming to the JSON Patch format
51
+ :param primary_key: key to use as the primary key for the objects
52
+ :param in_place: whether to apply the patches in place
53
+ :return:
54
+ """
55
+ objs_as_dict = {obj[primary_key]: obj for obj in objects}
56
+ result = apply_patches_to_keyed_list(objs_as_dict, patches, in_place=in_place)
57
+ return list(result.values())
58
+
59
+
60
+ def apply_patches_to_keyed_list(
61
+ objs_as_dict: Dict[str, Dict[str, Any]], patches: List[PatchDict], in_place=False
62
+ ) -> Dict[str, Dict[str, Any]]:
63
+ """
64
+ Apply a set of patches to a list of objects, where the objects are keyed by a primary key
65
+
66
+ :param objs_as_dict:
67
+ :param patches:
68
+ :param in_place:
69
+ :return:
70
+ """
71
+ patch_obj = jsonpatch.JsonPatch(patches)
72
+ result = patch_obj.apply(objs_as_dict, in_place=in_place)
73
+ return result
74
+
75
+
76
+ def patches_from_objects_lists(
77
+ src_objs: List[Dict[str, Any]], dst_objs: List[Dict[str, Any]], primary_key: str, exclude_none=True
78
+ ) -> List[PatchDict]:
79
+ """
80
+ Generate a set of patches to transform src_objs into tgt_objs.
81
+
82
+ >>> src_objs = [{'id': 'F1', 'name': 'Cheese'}, {'id': 'F2', 'name': 'Bread'}]
83
+ >>> tgt_objs = [{'id': 'F1', 'name': 'Toast'}, {'id': 'F2', 'name': 'Bread'}]
84
+ >>> patches_from_objects_lists(src_objs, tgt_objs, primary_key='id')
85
+ [{'op': 'replace', 'path': '/F1/name', 'value': 'Toast'}]
86
+
87
+ by default exclude_none is True, so None values are excluded from the patch
88
+
89
+ >>> tgt_objs = [{'id': 'F1', 'name': 'Toast'}, {'id': 'F2', 'name': None}]
90
+ >>> patches_from_objects_lists(src_objs, tgt_objs, primary_key='id')
91
+ [{'op': 'replace', 'path': '/F1/name', 'value': 'Toast'}, {'op': 'remove', 'path': '/F2/name'}]
92
+
93
+ if exclude_none is False, None values are treated as being set to None
94
+
95
+ >>> patches_from_objects_lists(src_objs, tgt_objs, primary_key='id', exclude_none=False)
96
+ [{'op': 'replace', 'path': '/F1/name', 'value': 'Toast'}, {'op': 'replace', 'path': '/F2/name', 'value': None}]
97
+
98
+ See also: `<https://github.com/orgs/linkml/discussions/1975>`_
99
+
100
+ Note the patches are sorted deterministically, first by path, then by operation.
101
+ This helps ensure operations on the same object are grouped together
102
+
103
+ :param src_objs: source objects
104
+ :param dst_objs: target objects
105
+ :param primary_key: key to use as the primary key for the objects
106
+ :param exclude_none: whether to exclude None values from the patch
107
+ :return:
108
+ """
109
+ src_objs_as_dict = {obj[primary_key]: obj for obj in src_objs}
110
+ dst_objs_as_dict = {obj[primary_key]: obj for obj in dst_objs}
111
+ if exclude_none:
112
+ src_objs_as_dict = {k: remove_nones(v) for k, v in src_objs_as_dict.items()}
113
+ dst_objs_as_dict = {k: remove_nones(v) for k, v in dst_objs_as_dict.items()}
114
+ patch_obj = jsonpatch.JsonPatch.from_diff(src_objs_as_dict, dst_objs_as_dict)
115
+ pl = patch_obj.patch
116
+ return sorted(pl, key=lambda x: (x["path"], x["op"]))
117
+
118
+
119
+ def remove_nones(obj: Dict[str, Any]) -> Dict[str, Any]:
120
+ """
121
+ Remove None values from a dictionary.
122
+
123
+ :param obj:
124
+ :return:
125
+ """
126
+ return {k: v for k, v in obj.items() if v is not None}
@@ -0,0 +1,89 @@
1
+ import operator
2
+ from typing import Any, Callable, Dict
3
+
4
+ MONGO_OPERATORS = {
5
+ "$eq": operator.eq,
6
+ "$ne": operator.ne,
7
+ "$gt": operator.gt,
8
+ "$gte": operator.ge,
9
+ "$lt": operator.lt,
10
+ "$lte": operator.le,
11
+ "$in": lambda a, b: any(x in b for x in (a if isinstance(a, list) else [a])),
12
+ "$nin": lambda a, b: all(x not in b for x in (a if isinstance(a, list) else [a])),
13
+ }
14
+
15
+
16
+ def mongo_query_to_match_function(where: Dict[str, Any]) -> Callable[[Dict[str, Any]], bool]:
17
+ """
18
+ Convert a MongoDB-style query to a matching function.
19
+
20
+ >>> query = {"name": "foo", "age": {"$gt": 25}}
21
+ >>> matcher = mongo_query_to_match_function(query)
22
+ >>> matcher({"name": "foo", "age": 30})
23
+ True
24
+ >>> matcher({"name": "foo", "age": 20})
25
+ False
26
+ >>> matcher({"name": "bar", "age": 30})
27
+ False
28
+
29
+ >>> nested_query = {"nested.job": "engineer", "skills": {"$in": ["python", "mongodb"]}}
30
+ >>> nested_matcher = mongo_query_to_match_function(nested_query)
31
+ >>> nested_matcher({"nested": {"job": "engineer"}, "skills": ["python", "javascript"]})
32
+ True
33
+ >>> nested_matcher({"nested": {"job": "designer"}, "skills": ["python", "mongodb"]})
34
+ False
35
+ >>> nested_matcher({"nested": {"job": "engineer"}, "skills": ["java", "c++"]})
36
+ False
37
+
38
+ >>> complex_query = {"name": "foo", "age": {"$gte": 25, "$lt": 40}, "nested.salary": {"$gt": 50000}}
39
+ >>> complex_matcher = mongo_query_to_match_function(complex_query)
40
+ >>> complex_matcher({"name": "foo", "age": 30, "nested": {"salary": 60000}})
41
+ True
42
+ >>> complex_matcher({"name": "foo", "age": 45, "nested": {"salary": 70000}})
43
+ False
44
+ >>> complex_matcher({"name": "foo", "age": 35, "nested": {"salary": 40000}})
45
+ False
46
+
47
+ >>> invalid_query = {"age": {"$invalid": 25}}
48
+ >>> invalid_matcher = mongo_query_to_match_function(invalid_query)
49
+ >>> invalid_matcher({"age": 30})
50
+ Traceback (most recent call last):
51
+ ...
52
+ ValueError: Unsupported operator: $invalid
53
+ """
54
+ if where is None:
55
+ where = {}
56
+
57
+ def matches(obj: Dict[str, Any]) -> bool:
58
+ def check_condition(key: str, condition: Any) -> bool:
59
+ if isinstance(condition, dict) and any(k.startswith("$") for k in condition.keys()):
60
+ for op, value in condition.items():
61
+ if op in MONGO_OPERATORS:
62
+ if not MONGO_OPERATORS[op](get_nested_value(obj, key), value):
63
+ return False
64
+ else:
65
+ raise ValueError(f"Unsupported operator: {op}")
66
+ elif isinstance(condition, dict):
67
+ return check_nested_condition(get_nested_value(obj, key), condition)
68
+ else:
69
+ return get_nested_value(obj, key) == condition
70
+ return True
71
+
72
+ def check_nested_condition(nested_obj: Dict[str, Any], nested_condition: Dict[str, Any]) -> bool:
73
+ for k, v in nested_condition.items():
74
+ if not check_condition(k, v):
75
+ return False
76
+ return True
77
+
78
+ def get_nested_value(obj: Dict[str, Any], key: str) -> Any:
79
+ parts = key.split(".")
80
+ for part in parts:
81
+ if isinstance(obj, dict):
82
+ obj = obj.get(part)
83
+ else:
84
+ return None
85
+ return obj
86
+
87
+ return all(check_condition(k, v) for k, v in where.items())
88
+
89
+ return matches