PyPI - linkml-store - Versions diffs - 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl - Mend

linkml-store 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of linkml-store might be problematic. Click here for more details.

Files changed (23) hide show

linkml_store/api/client.py +2 -0
linkml_store/api/collection.py +58 -9
linkml_store/api/config.py +12 -1
linkml_store/api/database.py +34 -3
linkml_store/api/stores/duckdb/duckdb_database.py +31 -3
linkml_store/api/stores/mongodb/mongodb_database.py +31 -1
linkml_store/api/stores/neo4j/__init__.py +0 -0
linkml_store/api/stores/neo4j/neo4j_collection.py +429 -0
linkml_store/api/stores/neo4j/neo4j_database.py +154 -0
linkml_store/cli.py +29 -2
linkml_store/graphs/__init__.py +0 -0
linkml_store/graphs/graph_map.py +24 -0
linkml_store/utils/format_utils.py +132 -14
linkml_store/utils/mongodb_utils.py +145 -0
linkml_store/utils/neo4j_utils.py +42 -0
linkml_store/utils/sql_utils.py +7 -2
linkml_store/webapi/html/generic.html.j2 +25 -28
linkml_store/webapi/main.py +346 -63
{linkml_store-0.1.11.dist-info → linkml_store-0.1.13.dist-info}/METADATA +6 -2
{linkml_store-0.1.11.dist-info → linkml_store-0.1.13.dist-info}/RECORD +23 -16
{linkml_store-0.1.11.dist-info → linkml_store-0.1.13.dist-info}/LICENSE +0 -0
{linkml_store-0.1.11.dist-info → linkml_store-0.1.13.dist-info}/WHEEL +0 -0
{linkml_store-0.1.11.dist-info → linkml_store-0.1.13.dist-info}/entry_points.txt +0 -0

linkml_store/utils/format_utils.py CHANGED Viewed

@@ -1,10 +1,14 @@
 import csv
+import gzip
+import io
 import json
+import logging
 import sys
+import tarfile
 from enum import Enum
 from io import StringIO
 from pathlib import Path
-from typing import Any, Dict, List, Optional, TextIO, Type, Union
+from typing import IO, Any, Dict, List, Optional, TextIO, Type, Union
 import pandas as pd
 import pystow
@@ -12,6 +16,8 @@ import yaml
 from pydantic import BaseModel
 from tabulate import tabulate
+logger = logging.getLogger(__name__)
 class Format(Enum):
     """
@@ -27,6 +33,35 @@ class Format(Enum):
     PARQUET = "parquet"
     FORMATTED = "formatted"
     TABLE = "table"
+    SQLDUMP_DUCKDB = "duckdb"
+    SQLDUMP_POSTGRES = "postgres"
+    DUMP_MONGODB = "mongodb"
+    @classmethod
+    def guess_format(cls, file_name: str) -> Optional["Format"]:
+        ext = Path(file_name).suffix.lower()
+        format_map = {
+            ".json": cls.JSON,
+            ".jsonl": cls.JSONL,
+            ".yaml": cls.YAML,
+            ".yml": cls.YAML,
+            ".tsv": cls.TSV,
+            ".csv": cls.CSV,
+            ".py": cls.PYTHON,
+            ".parquet": cls.PARQUET,
+            ".pq": cls.PARQUET,
+        }
+        fmt = format_map.get(ext, None)
+        if fmt is None:
+            if ext.startswith("."):
+                ext = ext[1:]
+            if ext in [f.value for f in Format]:
+                return Format(ext)
+        return fmt
+    def is_dump_format(self):
+        return self in [Format.SQLDUMP_DUCKDB, Format.SQLDUMP_POSTGRES, Format.DUMP_MONGODB]
 def load_objects_from_url(
@@ -46,15 +81,109 @@ def load_objects_from_url(
     :return: A list of dictionaries representing the loaded objects.
     """
     local_path = pystow.ensure("linkml", "linkml-store", url=url)
+    logger.info(f"synced to {local_path}")
     objs = load_objects(local_path, format=format, expected_type=expected_type, **kwargs)
     if not objs:
         raise ValueError(f"No objects loaded from URL: {url}")
     return objs
+def process_file(
+    f: IO, format: Format, expected_type: Optional[Type] = None, header_comment_token: Optional[str] = None
+) -> List[Dict[str, Any]]:
+    """
+    Process a single file and return a list of objects.
+    """
+    if format == Format.JSON:
+        objs = json.load(f)
+    elif format == Format.JSONL:
+        objs = [json.loads(line) for line in f]
+    elif format == Format.YAML:
+        if expected_type and expected_type == list:  # noqa E721
+            objs = list(yaml.safe_load_all(f))
+        else:
+            objs = yaml.safe_load(f)
+    elif format in [Format.TSV, Format.CSV]:
+        if header_comment_token:
+            while True:
+                pos = f.tell()
+                line = f.readline()
+                if not line.startswith(header_comment_token):
+                    f.seek(pos)
+                    break
+        delimiter = "\t" if format == Format.TSV else ","
+        reader = csv.DictReader(f, delimiter=delimiter)
+        objs = list(reader)
+    elif format == Format.PARQUET:
+        import pyarrow.parquet as pq
+        table = pq.read_table(f)
+        objs = table.to_pandas().to_dict(orient="records")
+    elif format in [Format.PYTHON, Format.FORMATTED, Format.TABLE]:
+        raise ValueError(f"Format {format} is not supported for loading objects")
+    else:
+        raise ValueError(f"Unsupported file format: {format}")
+    if not isinstance(objs, list):
+        objs = [objs]
+    return objs
 def load_objects(
+    file_path: Union[str, Path],
+    format: Optional[Union[Format, str]] = None,
+    compression: Optional[str] = None,
+    expected_type: Optional[Type] = None,
+    header_comment_token: Optional[str] = None,
+) -> List[Dict[str, Any]]:
+    """
+    Load objects from a file or archive in supported formats.
+    For tgz archives, it processes all files and concatenates the results.
+    :param file_path: The path to the file or archive.
+    :param format: The format of the file. Can be a Format enum or a string value.
+    :param compression: The compression type. Supports 'gz' for gzip and 'tgz' for tar.gz.
+    :param expected_type: The target type to load the objects into, e.g. list
+    :param header_comment_token: Token used for header comments to be skipped
+    :return: A list of dictionaries representing the loaded objects.
+    """
+    if isinstance(file_path, Path):
+        file_path = str(file_path)
+    if isinstance(format, str):
+        format = Format(format)
+    all_objects = []
+    if compression == "tgz":
+        with tarfile.open(file_path, "r:gz") as tar:
+            for member in tar.getmembers():
+                if member.isfile():
+                    f = tar.extractfile(member)
+                    if f:
+                        content = io.TextIOWrapper(f)
+                        member_format = Format.guess_format(member.name) if not format else format
+                        logger.debug(f"Processing tar member {member.name} with format {member_format}")
+                        all_objects.extend(process_file(content, member_format, expected_type, header_comment_token))
+    else:
+        if Path(file_path).is_dir():
+            raise ValueError(f"{file_path} is a dir, which is invalid for {format}")
+        mode = "rb" if format == Format.PARQUET or compression == "gz" else "r"
+        open_func = gzip.open if compression == "gz" else open
+        format = Format.guess_format(file_path) if not format else format
+        with open_func(file_path, mode) if file_path != "-" else sys.stdin as f:
+            if compression == "gz" and mode == "r":
+                f = io.TextIOWrapper(f)
+            all_objects = process_file(f, format, expected_type, header_comment_token)
+    logger.debug(f"Loaded {len(all_objects)} objects from {file_path}")
+    return all_objects
+def xxxload_objects(
     file_path: Union[str, Path],
     format: Union[Format, str] = None,
+    compression: Optional[str] = None,
     expected_type: Type = None,
     header_comment_token: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
@@ -172,7 +301,7 @@ def write_output(
 def render_output(
-    data: Union[List[Dict[str, Any]], Dict[str, Any], pd.DataFrame], format: Union[Format, str] = Format.YAML
+    data: Union[List[Dict[str, Any]], Dict[str, Any], pd.DataFrame], format: Optional[Union[Format, str]] = Format.YAML
 ) -> str:
     """
     Render output data in JSON, JSONLines, YAML, CSV, or TSV format.
@@ -271,15 +400,4 @@ def guess_format(path: str) -> Optional[Format]:
     :param path: The path to the file.
     :return: The guessed format.
     """
-    if path.endswith(".json"):
-        return Format.JSON
-    elif path.endswith(".jsonl"):
-        return Format.JSONL
-    elif path.endswith(".yaml") or path.endswith(".yml"):
-        return Format.YAML
-    elif path.endswith(".tsv"):
-        return Format.TSV
-    elif path.endswith(".csv"):
-        return Format.CSV
-    else:
-        return None
+    return Format.guess_format(path)

linkml_store/utils/mongodb_utils.py ADDED Viewed

@@ -0,0 +1,145 @@
+import logging
+import os
+import subprocess
+from pathlib import Path
+from typing import Optional
+from urllib.parse import urlparse
+from pymongo import MongoClient
+from pymongo.database import Database
+logger = logging.getLogger(__name__)
+def extract_connection_info(db: Database):
+    client = db.client
+    # Get the host and port
+    host_info = client.address
+    if host_info:
+        host, port = host_info
+    else:
+        # For replica sets or sharded clusters, we might need to get this differently
+        host = client.HOST
+        port = client.PORT
+    # Get the database name
+    db_name = db.name
+    # Get username if available
+    username = None
+    if hasattr(client, "options") and hasattr(client.options, "credentials"):
+        credentials = client.options.credentials
+        if credentials:
+            username = credentials.username
+    return {"host": host, "port": port, "db_name": db_name, "username": username}
+def get_connection_string(client: MongoClient):
+    """
+    Extract a connection string from the MongoClient.
+    This avoids triggering truth value testing on Database objects.
+    """
+    if client.address:
+        host, port = client.address
+        return f"{host}:{port}"
+    if hasattr(client, "address") and client.address:
+        host, port = client.address
+        return f"{host}:{port}"
+    elif client.hosts:
+        # For replica sets, return all hosts
+        return ",".join(f"{host}:{port}" for host, port in client.hosts)
+    elif hasattr(client, "HOST"):
+        # If we can't determine hosts, use the entire URI
+        parsed_uri = urlparse(client.HOST)
+        return f"{parsed_uri.hostname}:{parsed_uri.port}"
+    else:
+        raise ValueError("Unable to determine connection string from client")
+def get_connection_info(db: Database):
+    """
+    Extract connection information from the Database object.
+    """
+    # Get the name of the database
+    db_name = db.name
+    # Get the client's node list (this should work for single nodes and replica sets)
+    node_list = db.client.nodes
+    if not node_list:
+        raise ValueError("Unable to determine connection information from database")
+    # Use the first node in the list (for single node setups, this will be the only node)
+    first_node = node_list[0]
+    host, port = first_node
+    return host, port, db_name
+def get_auth_from_client(client: MongoClient):
+    """Extract authentication details from MongoClient."""
+    if hasattr(client, "_MongoClient__options"):
+        # For older versions of PyMongo
+        options = client._MongoClient__options
+    elif hasattr(client, "options"):
+        # For newer versions of PyMongo
+        options = client.options
+    else:
+        return None, None, None
+    if hasattr(options, "credentials"):
+        creds = options.credentials
+        return creds.username, creds.password, creds.source
+    return None, None, None
+def connection_from_handle(handle: str):
+    if handle.startswith("mongodb://"):
+        handle = handle.replace("mongodb://", "")
+    host, db = handle.split("/")
+    return host, db
+def export_mongodb(handle: str, location: str, password: Optional[str] = None):
+    host, db_name = connection_from_handle(handle)
+    # Construct the mongodump command
+    cmd = ["mongodump", f"--host={host}", f"--db={db_name}"]
+    logger.info(f"Exporting MongoDB database {db_name} from {host} to {location}")
+    cmd.extend(["--out", location])
+    result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+    logger.info(f"MongoDB export completed successfully. Output: {result.stdout}")
+def import_mongodb(handle: str, dump_dir: str, drop: bool = False):
+    host, db_name = connection_from_handle(handle)
+    # list dirs in dump_dir
+    dir_path = Path(dump_dir)
+    if not dir_path.is_dir():
+        raise ValueError(f"{dir_path} is not a dir")
+    directories = [name for name in os.listdir(dump_dir)]
+    if len(directories) != 1:
+        raise ValueError(f"Expected exactly one database in {dump_dir}, got: {directories}")
+    src_db_name = directories[0]
+    # Construct the mongorestore command
+    cmd = [
+        "mongorestore",
+        f"--host={host}",
+        f"--nsFrom={src_db_name}.*",
+        f"--nsTo={db_name}.*",
+        str(dump_dir),
+    ]
+    # Add drop option if specified
+    if drop:
+        cmd.append("--drop")
+    logger.info(f"CMD={cmd}")
+    # Execute mongorestore
+    result = subprocess.run(cmd, check=True, capture_output=True, text=True)
+    if result.stderr:
+        logger.warning(result.stderr)
+    logger.info(f"MongoDB import completed successfully. Output: {result.stdout} // {result.stderr}")

linkml_store/utils/neo4j_utils.py ADDED Viewed

@@ -0,0 +1,42 @@
+import networkx as nx
+from py2neo import Graph
+def draw_neo4j_graph(handle="bolt://localhost:7687", auth=("neo4j", None)):
+    # Connect to Neo4j
+    graph = Graph(handle, auth=auth)
+    # Run a Cypher query
+    query = """
+    MATCH (n)-[r]->(m)
+    RETURN n, r, m
+    LIMIT 100
+    """
+    result = graph.run(query)
+    # Create a NetworkX graph
+    G = nx.DiGraph()  # Use DiGraph for directed edges
+    for record in result:
+        n = record["n"]
+        m = record["m"]
+        r = record["r"]
+        G.add_node(n["name"], label=list(n.labels or ["-"])[0])
+        G.add_node(m["name"], label=list(m.labels or ["-"])[0])
+        G.add_edge(n["name"], m["name"], type=type(r).__name__)
+    # Draw the graph
+    pos = nx.spring_layout(G)
+    # Draw nodes
+    nx.draw_networkx_nodes(G, pos, node_color="lightblue", node_size=10000)
+    # Draw edges
+    nx.draw_networkx_edges(G, pos, edge_color="gray", arrows=True)
+    # Add node labels
+    node_labels = nx.get_node_attributes(G, "label")
+    nx.draw_networkx_labels(G, pos, {node: f"{node}\n({label})" for node, label in node_labels.items()}, font_size=16)
+    # Add edge labels
+    edge_labels = nx.get_edge_attributes(G, "type")
+    nx.draw_networkx_edge_labels(G, pos, edge_labels, font_size=16)

linkml_store/utils/sql_utils.py CHANGED Viewed

@@ -66,9 +66,14 @@ def col_val_constraints_to_conjs(col_name: str, val_constraints: Any) -> list:
         conjs = []
         for k, v in val_constraints.items():
             if k in OP_MAP:
-                conjs.append(f"{OP_MAP[k]}({col_name}, {_quote(v)})")
+                if k == "$in" and isinstance(v, list):
+                    v_mapped = [_quote(v1) for v1 in v]
+                    t = f"{col_name} IN ({', '.join(v_mapped)})"
+                else:
+                    t = f"{OP_MAP[k]}({col_name}, {_quote(v)})"
             else:
-                conjs.append(f"{col_name} {k} {_quote(v)}")
+                t = f"{col_name} {k} {_quote(v)}"
+            conjs.append(t)
         return conjs
     else:
         return [f"{col_name} = {_quote(val_constraints)}"]

linkml_store/webapi/html/generic.html.j2 CHANGED Viewed

@@ -1,5 +1,13 @@
 {% extends "base.html.j2" %}
+{% macro make_link(link) %}
+    {{ link.rel }} [
+                page: <a href="/pages{{ link.href }}">/pages{{ link.href }}</a> |
+                API: <a href="{{ link.href }}">{{ link.href }}</a> ]
+    <a href="{{ href }}">{{ rel }}</a>
+{% endmacro %}
 {% block title %}{meta.path}{% endblock %}
 {% block content %}
@@ -9,38 +17,27 @@
         </pre>
     <h1>Links</h1>
-        <ul>
-{% for link in response.links %}
-            <li>
-                <a href="/pages{{ link.href }}">{{ link.rel }} ({{ link.href }})</a>
-            </li>
-{% endfor %}
+    <ul>
+        {% for link in response.links %}
+            <li> {{ make_link(link) }} </li>
+        {% endfor %}
     </ul>
-</ul>
+    {% if response.items != None and response["items"] != None %}
+    <h1>Items</h1>
+    <ul>
+    {% for item in response["items"]  %}
+        <li>
+            {{ item.name }}
+            {% for link in item.links %}
+                {{ make_link(link) }}
+            {% endfor %}
+                HTML: {{ item.html | safe }}
+        </li>
+    {% endfor %}
+    {%  endif %}
     <h1>Data</h1>
-    {% if data_html %}
-        <ul>
-        {% for e in data_html %}
-            <li>{{ e|safe }}</li>
-        {% endfor %}
-        </ul>
-    {% else %}
-        {%  if "items" in response.data %}
-        <ul>
-        {% for item in response.data['items'] %}
-            <li>
-                {{ item.name }}
-                {% for link in item.links %}
-                    <a href="/pages{{ link.href }}">{{ link.rel }}</a>
-                {% endfor %}
-            </li>
-        {% endfor %}
-        </ul>
-        {%  endif %}
         <pre>
 {{ response.data }}
         </pre>
-    {% endif %}
 {% endblock %}

linkml-store 0.1.11__py3-none-any.whl → 0.1.13__py3-none-any.whl

Potentially problematic release.

linkml-store 0.1.11py3-none-any.whl → 0.1.13py3-none-any.whl