PyPI - cognee - Versions diffs - 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl - Mend

cognee 0.2.1.dev7py3-none-any.whl → 0.2.2.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (223) hide show

cognee/tests/test_graph_visualization_permissions.py ADDED Viewed

@@ -0,0 +1,161 @@
+import asyncio
+import os
+import pathlib
+import pytest
+import pytest_asyncio
+from httpx import ASGITransport, AsyncClient
+import cognee
+from cognee.api.client import app
+from cognee.modules.users.methods import create_user, get_default_user
+from cognee.modules.users.permissions.methods import authorized_give_permission_on_datasets
+# Use pytest-asyncio to handle all async tests
+pytestmark = pytest.mark.asyncio
+@pytest.fixture(scope="module")
+def event_loop():
+    """Create an instance of the default event loop for our test module."""
+    policy = asyncio.get_event_loop_policy()
+    loop = policy.new_event_loop()
+    yield loop
+    loop.close()
+@pytest_asyncio.fixture(scope="module")
+async def client():
+    """Create an async HTTP client for testing"""
+    transport = ASGITransport(app=app)
+    async with AsyncClient(transport=transport, base_url="http://test") as client:
+        yield client
+@pytest_asyncio.fixture(scope="module")
+async def setup_environment():
+    """
+    Set up a clean environment for the test, creating necessary users and datasets.
+    This fixture runs once before all tests and cleans up afterwards.
+    """
+    # 1. Enable permissions feature
+    os.environ["ENABLE_BACKEND_ACCESS_CONTROL"] = "True"
+    # 2. Set up an independent test directory
+    base_dir = pathlib.Path(__file__).parent
+    cognee.config.data_root_directory(str(base_dir / ".data_storage/test_graph_viz"))
+    cognee.config.system_root_directory(str(base_dir / ".cognee_system/test_graph_viz"))
+    # 3. Clean up old data
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+    # 4. Add document for default user
+    explanation_file_path = os.path.join(base_dir, "test_data/Natural_language_processing.txt")
+    await cognee.add([explanation_file_path], dataset_name="NLP")
+    default_user = await get_default_user()
+    nlp_cognify_result = await cognee.cognify(["NLP"], user=default_user)
+    def extract_dataset_id_from_cognify(cognify_result):
+        """Extract dataset_id from cognify output dictionary"""
+        for dataset_id, pipeline_result in cognify_result.items():
+            return dataset_id
+        return None
+    dataset_id = extract_dataset_id_from_cognify(nlp_cognify_result)
+    yield dataset_id
+    # 5. Clean up data after tests are finished
+    await cognee.prune.prune_data()
+    await cognee.prune.prune_system(metadata=True)
+async def get_authentication_headers(client: AsyncClient, email: str, password: str) -> dict:
+    """Authenticates and returns the Authorization header."""
+    login_data = {"username": email, "password": password}
+    response = await client.post("/api/v1/auth/login", data=login_data, timeout=15)
+    assert response.status_code == 200, "Failed to log in and get token"
+    token_data = response.json()
+    access_token = token_data["access_token"]
+    return {"Authorization": f"Bearer {access_token}"}
+async def test_owner_can_access_graph(client: AsyncClient, setup_environment: int):
+    """
+    Test Case 1: The dataset owner should be able to access the graph data successfully.
+    """
+    dataset_id = setup_environment
+    default_user_email = "default_user@example.com"
+    default_user_password = "default_password"
+    response = await client.get(
+        f"/api/v1/datasets/{dataset_id}/graph",
+        headers=await get_authentication_headers(client, default_user_email, default_user_password),
+    )
+    assert response.status_code == 200, (
+        f"Owner failed to get the knowledge graph visualization. Response: {response.json()}"
+    )
+    data = response.json()
+    assert len(data) > 1, "The graph data is not valid."
+    print("✅ Owner can access the graph visualization successfully.")
+async def test_granting_permission_enables_access(client: AsyncClient, setup_environment: int):
+    """
+    Test Case 2: A user without any permissions should be denied access (404 Not Found).
+    After granting permission, the user should be able to access the graph data.
+    """
+    dataset_id = setup_environment
+    # Create a user without any permissions to the dataset
+    test_user_email = "test_user@example.com"
+    test_user_password = "test_password"
+    test_user = await create_user(test_user_email, test_user_password)
+    # Test the access to graph visualization for the test user without any permissions
+    response = await client.get(
+        f"/api/v1/datasets/{dataset_id}/graph",
+        headers=await get_authentication_headers(client, test_user_email, test_user_password),
+    )
+    assert response.status_code == 403, (
+        "Access to graph visualization should be denied without READ permission."
+    )
+    assert (
+        response.json()["detail"]
+        == "Request owner does not have necessary permission: [read] for all datasets requested. [PermissionDeniedError]"
+    )
+    print("✅ Access to graph visualization should be denied without READ permission.")
+    # Grant permission to the test user
+    default_user = await get_default_user()
+    await authorized_give_permission_on_datasets(
+        test_user.id, [dataset_id], "read", default_user.id
+    )
+    # Test the access to graph visualization for the test user
+    response_for_test_user = await client.get(
+        f"/api/v1/datasets/{dataset_id}/graph",
+        headers=await get_authentication_headers(client, test_user_email, test_user_password),
+    )
+    assert response_for_test_user.status_code == 200, (
+        "Access to graph visualization should succeed for user with been granted read permission"
+    )
+    print(
+        "✅ Access to graph visualization should succeed for user with been granted read permission"
+    )
+    # Test the graph data is the same for the test user and the default user
+    default_user_email = "default_user@example.com"
+    default_user_password = "default_password"
+    response_for_default_user = await client.get(
+        f"/api/v1/datasets/{dataset_id}/graph",
+        headers=await get_authentication_headers(client, default_user_email, default_user_password),
+    )
+    assert response_for_test_user.json() == response_for_default_user.json(), (
+        "The graph data for the test user and the default user is not the same."
+    )
+    print("✅ The graph data for the test user and the default user is the same.")

cognee/tests/test_neptune_analytics_graph.py ADDED Viewed

@@ -0,0 +1,309 @@
+import os
+from dotenv import load_dotenv
+import asyncio
+from cognee.infrastructure.databases.graph.neptune_driver import NeptuneGraphDB
+from cognee.modules.chunking.models import DocumentChunk
+from cognee.modules.engine.models import Entity, EntityType
+from cognee.modules.data.processing.document_types import TextDocument
+# Set up Amazon credentials in .env file and get the values from environment variables
+load_dotenv()
+graph_id = os.getenv("GRAPH_ID", "")
+na_adapter = NeptuneGraphDB(graph_id)
+def setup():
+    # Define nodes data before the main function
+    # These nodes were defined using openAI from the following prompt:
+    # Neptune Analytics is an ideal choice for investigatory, exploratory, or data-science workloads
+    #     that require fast iteration for data, analytical and algorithmic processing, or vector search on graph data. It
+    #     complements Amazon Neptune Database, a popular managed graph database. To perform intensive analysis, you can load
+    #     the data from a Neptune Database graph or snapshot into Neptune Analytics. You can also load graph data that's
+    #     stored in Amazon S3.
+    document = TextDocument(
+        name="text_test.txt",
+        raw_data_location="git/cognee/examples/database_examples/data_storage/data/text_test.txt",
+        external_metadata="{}",
+        mime_type="text/plain",
+    )
+    document_chunk = DocumentChunk(
+        text="Neptune Analytics is an ideal choice for investigatory, exploratory, or data-science workloads \n    that require fast iteration for data, analytical and algorithmic processing, or vector search on graph data. It \n    complements Amazon Neptune Database, a popular managed graph database. To perform intensive analysis, you can load \n    the data from a Neptune Database graph or snapshot into Neptune Analytics. You can also load graph data that's \n    stored in Amazon S3.\n    ",
+        chunk_size=187,
+        chunk_index=0,
+        cut_type="paragraph_end",
+        is_part_of=document,
+    )
+    graph_database = EntityType(name="graph database", description="graph database")
+    neptune_analytics_entity = Entity(
+        name="neptune analytics",
+        description="A memory-optimized graph database engine for analytics that processes large amounts of graph data quickly.",
+    )
+    neptune_database_entity = Entity(
+        name="amazon neptune database",
+        description="A popular managed graph database that complements Neptune Analytics.",
+    )
+    storage = EntityType(name="storage", description="storage")
+    storage_entity = Entity(
+        name="amazon s3",
+        description="A storage service provided by Amazon Web Services that allows storing graph data.",
+    )
+    nodes_data = [
+        document,
+        document_chunk,
+        graph_database,
+        neptune_analytics_entity,
+        neptune_database_entity,
+        storage,
+        storage_entity,
+    ]
+    edges_data = [
+        (
+            str(document_chunk.id),
+            str(storage_entity.id),
+            "contains",
+        ),
+        (
+            str(storage_entity.id),
+            str(storage.id),
+            "is_a",
+        ),
+        (
+            str(document_chunk.id),
+            str(neptune_database_entity.id),
+            "contains",
+        ),
+        (
+            str(neptune_database_entity.id),
+            str(graph_database.id),
+            "is_a",
+        ),
+        (
+            str(document_chunk.id),
+            str(document.id),
+            "is_part_of",
+        ),
+        (
+            str(document_chunk.id),
+            str(neptune_analytics_entity.id),
+            "contains",
+        ),
+        (
+            str(neptune_analytics_entity.id),
+            str(graph_database.id),
+            "is_a",
+        ),
+    ]
+    return nodes_data, edges_data
+async def pipeline_method():
+    """
+    Example script using the neptune analytics with small sample data
+    This example demonstrates how to add nodes to Neptune Analytics
+    """
+    print("------TRUNCATE GRAPH-------")
+    await na_adapter.delete_graph()
+    print("------SETUP DATA-------")
+    nodes, edges = setup()
+    print("------ADD NODES-------")
+    await na_adapter.add_node(nodes[0])
+    await na_adapter.add_nodes(nodes[1:])
+    print("------GET NODES FROM DATA-------")
+    node_ids = [str(node.id) for node in nodes]
+    db_nodes = await na_adapter.get_nodes(node_ids)
+    print("------RESULTS:-------")
+    for n in db_nodes:
+        print(n)
+    print("------ADD EDGES-------")
+    await na_adapter.add_edge(edges[0][0], edges[0][1], edges[0][2])
+    await na_adapter.add_edges(edges[1:])
+    print("------HAS EDGES-------")
+    has_edge = await na_adapter.has_edge(
+        edges[0][0],
+        edges[0][1],
+        edges[0][2],
+    )
+    if has_edge:
+        print(f"found edge ({edges[0][0]})-[{edges[0][2]}]->({edges[0][1]})")
+    has_edges = await na_adapter.has_edges(edges)
+    if len(has_edges) > 0:
+        print(f"found edges: {len(has_edges)} (expected: {len(edges)})")
+    else:
+        print(f"no edges found (expected: {len(edges)})")
+    print("------GET GRAPH-------")
+    all_nodes, all_edges = await na_adapter.get_graph_data()
+    print(f"found {len(all_nodes)} nodes and found {len(all_edges)} edges")
+    print("------NEIGHBORING NODES-------")
+    center_node = nodes[2]
+    neighbors = await na_adapter.get_neighbors(str(center_node.id))
+    print(f'found {len(neighbors)} neighbors for node "{center_node.name}"')
+    for neighbor in neighbors:
+        print(neighbor)
+    print("------NEIGHBORING EDGES-------")
+    center_node = nodes[2]
+    neighbouring_edges = await na_adapter.get_edges(str(center_node.id))
+    print(f'found {len(neighbouring_edges)} edges neighbouring node "{center_node.name}"')
+    for edge in neighbouring_edges:
+        print(edge)
+    print("------GET CONNECTIONS (SOURCE NODE)-------")
+    document_chunk_node = nodes[0]
+    connections = await na_adapter.get_connections(str(document_chunk_node.id))
+    print(f'found {len(connections)} connections for node "{document_chunk_node.type}"')
+    for connection in connections:
+        src, relationship, tgt = connection
+        src = src.get("name", src.get("type", "unknown"))
+        relationship = relationship["relationship_name"]
+        tgt = tgt.get("name", tgt.get("type", "unknown"))
+        print(f'"{src}"-[{relationship}]->"{tgt}"')
+    print("------GET CONNECTIONS (TARGET NODE)-------")
+    connections = await na_adapter.get_connections(str(center_node.id))
+    print(f'found {len(connections)} connections for node "{center_node.name}"')
+    for connection in connections:
+        src, relationship, tgt = connection
+        src = src.get("name", src.get("type", "unknown"))
+        relationship = relationship["relationship_name"]
+        tgt = tgt.get("name", tgt.get("type", "unknown"))
+        print(f'"{src}"-[{relationship}]->"{tgt}"')
+    print("------SUBGRAPH-------")
+    node_names = ["neptune analytics", "amazon neptune database"]
+    subgraph_nodes, subgraph_edges = await na_adapter.get_nodeset_subgraph(Entity, node_names)
+    print(
+        f"found {len(subgraph_nodes)} nodes and  {len(subgraph_edges)} edges in the subgraph around {node_names}"
+    )
+    for subgraph_node in subgraph_nodes:
+        print(subgraph_node)
+    for subgraph_edge in subgraph_edges:
+        print(subgraph_edge)
+    print("------STAT-------")
+    stat = await na_adapter.get_graph_metrics(include_optional=True)
+    assert type(stat) is dict
+    assert stat["num_nodes"] == 7
+    assert stat["num_edges"] == 7
+    assert stat["mean_degree"] == 2.0
+    assert round(stat["edge_density"], 3) == 0.167
+    assert stat["num_connected_components"] == [7]
+    assert stat["sizes_of_connected_components"] == 1
+    assert stat["num_selfloops"] == 0
+    # Unsupported optional metrics
+    assert stat["diameter"] == -1
+    assert stat["avg_shortest_path_length"] == -1
+    assert stat["avg_clustering"] == -1
+    print("------DELETE-------")
+    # delete all nodes and edges:
+    await na_adapter.delete_graph()
+    # delete all nodes by node id
+    # node_ids = [str(node.id) for node in nodes]
+    # await na_adapter.delete_nodes(node_ids)
+    has_edges = await na_adapter.has_edges(edges)
+    if len(has_edges) == 0:
+        print("Delete successful")
+    else:
+        print("Delete failed")
+async def misc_methods():
+    print("------TRUNCATE GRAPH-------")
+    await na_adapter.delete_graph()
+    print("------SETUP TEST ENV-------")
+    nodes, edges = setup()
+    await na_adapter.add_nodes(nodes)
+    await na_adapter.add_edges(edges)
+    print("------GET GRAPH-------")
+    all_nodes, all_edges = await na_adapter.get_graph_data()
+    print(f"found {len(all_nodes)} nodes and found {len(all_edges)} edges")
+    print("------GET DISCONNECTED-------")
+    nodes_disconnected = await na_adapter.get_disconnected_nodes()
+    print(nodes_disconnected)
+    assert len(nodes_disconnected) == 0
+    print("------Get Labels (Node)-------")
+    node_labels = await na_adapter.get_node_labels_string()
+    print(node_labels)
+    print("------Get Labels (Edge)-------")
+    edge_labels = await na_adapter.get_relationship_labels_string()
+    print(edge_labels)
+    print("------Get Filtered Graph-------")
+    filtered_nodes, filtered_edges = await na_adapter.get_filtered_graph_data(
+        [{"name": ["text_test.txt"]}]
+    )
+    print(filtered_nodes, filtered_edges)
+    print("------Get Degree one nodes-------")
+    degree_one_nodes = await na_adapter.get_degree_one_nodes("EntityType")
+    print(degree_one_nodes)
+    print("------Get Doc sub-graph-------")
+    doc_sub_graph = await na_adapter.get_document_subgraph("test.txt")
+    print(doc_sub_graph)
+    print("------Fetch and Remove connections (Predecessors)-------")
+    # Fetch test edge
+    (src_id, dest_id, relationship) = edges[0]
+    nodes_predecessors = await na_adapter.get_predecessors(node_id=dest_id, edge_label=relationship)
+    assert len(nodes_predecessors) > 0
+    await na_adapter.remove_connection_to_predecessors_of(
+        node_ids=[src_id], edge_label=relationship
+    )
+    nodes_predecessors_after = await na_adapter.get_predecessors(
+        node_id=dest_id, edge_label=relationship
+    )
+    # Return empty after relationship being deleted.
+    assert len(nodes_predecessors_after) == 0
+    print("------Fetch and Remove connections (Successors)-------")
+    _, edges_suc = await na_adapter.get_graph_data()
+    (src_id, dest_id, relationship, _) = edges_suc[0]
+    nodes_successors = await na_adapter.get_successors(node_id=src_id, edge_label=relationship)
+    assert len(nodes_successors) > 0
+    await na_adapter.remove_connection_to_successors_of(node_ids=[dest_id], edge_label=relationship)
+    nodes_successors_after = await na_adapter.get_successors(
+        node_id=src_id, edge_label=relationship
+    )
+    assert len(nodes_successors_after) == 0
+    # no-op
+    await na_adapter.project_entire_graph()
+    await na_adapter.drop_graph()
+    await na_adapter.graph_exists()
+    pass
+if __name__ == "__main__":
+    asyncio.run(pipeline_method())
+    asyncio.run(misc_methods())

cognee/tests/test_neptune_analytics_hybrid.py ADDED Viewed

@@ -0,0 +1,176 @@
+import os
+from dotenv import load_dotenv
+import asyncio
+import pytest
+from cognee.modules.chunking.models import DocumentChunk
+from cognee.modules.engine.models import Entity, EntityType
+from cognee.modules.data.processing.document_types import TextDocument
+from cognee.infrastructure.databases.vector.embeddings import get_embedding_engine
+from cognee.shared.logging_utils import get_logger
+from cognee.infrastructure.databases.hybrid.neptune_analytics.NeptuneAnalyticsAdapter import (
+    NeptuneAnalyticsAdapter,
+)
+# Set up Amazon credentials in .env file and get the values from environment variables
+load_dotenv()
+graph_id = os.getenv("GRAPH_ID", "")
+# get the default embedder
+embedding_engine = get_embedding_engine()
+na_graph = NeptuneAnalyticsAdapter(graph_id)
+na_vector = NeptuneAnalyticsAdapter(graph_id, embedding_engine)
+collection = "test_collection"
+logger = get_logger("test_neptune_analytics_hybrid")
+def setup_data():
+    # Define nodes data before the main function
+    # These nodes were defined using openAI from the following prompt:
+    #
+    # Neptune Analytics is an ideal choice for investigatory, exploratory, or data-science workloads
+    #     that require fast iteration for data, analytical and algorithmic processing, or vector search on graph data. It
+    #     complements Amazon Neptune Database, a popular managed graph database. To perform intensive analysis, you can load
+    #     the data from a Neptune Database graph or snapshot into Neptune Analytics. You can also load graph data that's
+    #     stored in Amazon S3.
+    document = TextDocument(
+        name="text.txt",
+        raw_data_location="git/cognee/examples/database_examples/data_storage/data/text.txt",
+        external_metadata="{}",
+        mime_type="text/plain",
+    )
+    document_chunk = DocumentChunk(
+        text="Neptune Analytics is an ideal choice for investigatory, exploratory, or data-science workloads \n    that require fast iteration for data, analytical and algorithmic processing, or vector search on graph data. It \n    complements Amazon Neptune Database, a popular managed graph database. To perform intensive analysis, you can load \n    the data from a Neptune Database graph or snapshot into Neptune Analytics. You can also load graph data that's \n    stored in Amazon S3.\n    ",
+        chunk_size=187,
+        chunk_index=0,
+        cut_type="paragraph_end",
+        is_part_of=document,
+    )
+    graph_database = EntityType(name="graph database", description="graph database")
+    neptune_analytics_entity = Entity(
+        name="neptune analytics",
+        description="A memory-optimized graph database engine for analytics that processes large amounts of graph data quickly.",
+    )
+    neptune_database_entity = Entity(
+        name="amazon neptune database",
+        description="A popular managed graph database that complements Neptune Analytics.",
+    )
+    storage = EntityType(name="storage", description="storage")
+    storage_entity = Entity(
+        name="amazon s3",
+        description="A storage service provided by Amazon Web Services that allows storing graph data.",
+    )
+    nodes_data = [
+        document,
+        document_chunk,
+        graph_database,
+        neptune_analytics_entity,
+        neptune_database_entity,
+        storage,
+        storage_entity,
+    ]
+    edges_data = [
+        (
+            str(document_chunk.id),
+            str(storage_entity.id),
+            "contains",
+        ),
+        (
+            str(storage_entity.id),
+            str(storage.id),
+            "is_a",
+        ),
+        (
+            str(document_chunk.id),
+            str(neptune_database_entity.id),
+            "contains",
+        ),
+        (
+            str(neptune_database_entity.id),
+            str(graph_database.id),
+            "is_a",
+        ),
+        (
+            str(document_chunk.id),
+            str(document.id),
+            "is_part_of",
+        ),
+        (
+            str(document_chunk.id),
+            str(neptune_analytics_entity.id),
+            "contains",
+        ),
+        (
+            str(neptune_analytics_entity.id),
+            str(graph_database.id),
+            "is_a",
+        ),
+    ]
+    return nodes_data, edges_data
+async def test_add_graph_then_vector_data():
+    logger.info("------test_add_graph_then_vector_data-------")
+    (nodes, edges) = setup_data()
+    await na_graph.add_nodes(nodes)
+    await na_graph.add_edges(edges)
+    await na_vector.create_data_points(collection, nodes)
+    node_ids = [str(node.id) for node in nodes]
+    retrieved_data_points = await na_vector.retrieve(collection, node_ids)
+    retrieved_nodes = await na_graph.get_nodes(node_ids)
+    assert len(retrieved_data_points) == len(retrieved_nodes) == len(node_ids)
+    # delete all nodes and edges and vectors:
+    await na_graph.delete_graph()
+    await na_vector.prune()
+    (nodes, edges) = await na_graph.get_graph_data()
+    assert len(nodes) == 0
+    assert len(edges) == 0
+    logger.info("------PASSED-------")
+async def test_add_vector_then_node_data():
+    logger.info("------test_add_vector_then_node_data-------")
+    (nodes, edges) = setup_data()
+    await na_vector.create_data_points(collection, nodes)
+    await na_graph.add_nodes(nodes)
+    await na_graph.add_edges(edges)
+    node_ids = [str(node.id) for node in nodes]
+    retrieved_data_points = await na_vector.retrieve(collection, node_ids)
+    retrieved_nodes = await na_graph.get_nodes(node_ids)
+    assert len(retrieved_data_points) == len(retrieved_nodes) == len(node_ids)
+    # delete all nodes and edges and vectors:
+    await na_vector.prune()
+    await na_graph.delete_graph()
+    (nodes, edges) = await na_graph.get_graph_data()
+    assert len(nodes) == 0
+    assert len(edges) == 0
+    logger.info("------PASSED-------")
+def main():
+    """
+    Example script uses neptune analytics for the graph and vector (hybrid) store with small sample data
+    This example demonstrates how to add nodes and vectors to Neptune Analytics, and ensures that
+    the nodes do not conflict
+    """
+    asyncio.run(test_add_graph_then_vector_data())
+    asyncio.run(test_add_vector_then_node_data())
+if __name__ == "__main__":
+    main()

cognee 0.2.1.dev7__py3-none-any.whl → 0.2.2.dev1__py3-none-any.whl

cognee 0.2.1.dev7py3-none-any.whl → 0.2.2.dev1py3-none-any.whl