proscenium 0.0.8__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {proscenium-0.0.8 → proscenium-0.0.11}/PKG-INFO +21 -29
  2. proscenium-0.0.11/pyproject.toml +38 -0
  3. proscenium-0.0.11/setup.cfg +4 -0
  4. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/patterns/rag.py +37 -2
  5. proscenium-0.0.11/src/proscenium.egg-info/PKG-INFO +40 -0
  6. proscenium-0.0.11/src/proscenium.egg-info/SOURCES.txt +31 -0
  7. proscenium-0.0.11/src/proscenium.egg-info/dependency_links.txt +1 -0
  8. proscenium-0.0.11/src/proscenium.egg-info/entry_points.txt +2 -0
  9. proscenium-0.0.11/src/proscenium.egg-info/requires.txt +15 -0
  10. proscenium-0.0.11/src/proscenium.egg-info/top_level.txt +1 -0
  11. proscenium-0.0.11/tests/test_demo_typer_help.py +32 -0
  12. proscenium-0.0.11/tests/test_display.py +7 -0
  13. proscenium-0.0.8/proscenium/patterns/chunk_space.py +0 -51
  14. proscenium-0.0.8/proscenium/patterns/document_enricher.py +0 -84
  15. proscenium-0.0.8/proscenium/patterns/entity_resolver.py +0 -95
  16. proscenium-0.0.8/proscenium/patterns/knowledge_graph.py +0 -41
  17. proscenium-0.0.8/proscenium/verbs/chunk.py +0 -42
  18. proscenium-0.0.8/proscenium/verbs/display/milvus.py +0 -68
  19. proscenium-0.0.8/proscenium/verbs/display/neo4j.py +0 -25
  20. proscenium-0.0.8/proscenium/verbs/extract.py +0 -65
  21. proscenium-0.0.8/proscenium/verbs/read.py +0 -53
  22. proscenium-0.0.8/proscenium/verbs/vector_database.py +0 -139
  23. proscenium-0.0.8/proscenium/verbs/write.py +0 -14
  24. proscenium-0.0.8/pyproject.toml +0 -37
  25. {proscenium-0.0.8 → proscenium-0.0.11}/LICENSE +0 -0
  26. {proscenium-0.0.8 → proscenium-0.0.11}/README.md +0 -0
  27. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/__init__.py +0 -0
  28. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/admin/__init__.py +0 -0
  29. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/bin/__init__.py +0 -0
  30. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/bin/bot.py +0 -0
  31. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/core/__init__.py +0 -0
  32. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/interfaces/__init__.py +0 -0
  33. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/interfaces/slack.py +0 -0
  34. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/patterns/__init__.py +0 -0
  35. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/patterns/graph_rag.py +0 -0
  36. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/patterns/tools.py +0 -0
  37. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/util/__init__.py +0 -0
  38. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/verbs/__init__.py +0 -0
  39. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/verbs/complete.py +0 -0
  40. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/verbs/display/__init__.py +0 -0
  41. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/verbs/display/chat.py +0 -0
  42. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/verbs/display/tools.py +0 -0
  43. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/verbs/display.py +0 -0
  44. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/verbs/invoke.py +0 -0
  45. {proscenium-0.0.8 → proscenium-0.0.11/src}/proscenium/verbs/remember.py +0 -0
@@ -1,34 +1,27 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.4
2
2
  Name: proscenium
3
- Version: 0.0.8
3
+ Version: 0.0.11
4
4
  Summary: Frame AI Agents
5
- License: ASFv2
6
- Author: Adam Pingel
7
- Author-email: oss@pingel.org
8
- Requires-Python: >=3.11,<4.0
9
- Classifier: License :: Other/Proprietary License
10
- Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.11
12
- Classifier: Programming Language :: Python :: 3.12
13
- Classifier: Programming Language :: Python :: 3.13
14
- Provides-Extra: testing
15
- Requires-Dist: aisuite[openai,anthropic] (>=0.1.10,<0.2.0)
16
- Requires-Dist: datasets (>=3.3.2,<4.0.0)
17
- Requires-Dist: docstring_parser (>=0.16,<0.17)
18
- Requires-Dist: gofannon (>=0.25.13,<0.26.0)
19
- Requires-Dist: langchain-community (>=0.3.18,<0.4.0)
20
- Requires-Dist: langchain-huggingface (>=0.1.2,<0.2.0)
21
- Requires-Dist: neo4j (>=5.28.1,<6.0.0)
22
- Requires-Dist: pydantic (>=2.10.6,<3.0.0)
23
- Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
24
- Requires-Dist: pymilvus_model (>=0.3.1,<0.4.0)
25
- Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
26
- Requires-Dist: rich (>=13.9.4,<14.0.0)
27
- Requires-Dist: slack_sdk (>=3.35.0,<4.0.0)
28
- Requires-Dist: stringcase (>=1.2.0,<2.0.0)
29
- Requires-Dist: tiktoken (>=0.9.0,<0.10.0)
30
- Requires-Dist: typer (>=0.15.2,<0.16.0)
5
+ Author-email: Adam Pingel <oss@pingel.org>
6
+ License-Expression: Apache-2.0
7
+ Requires-Python: >=3.11
31
8
  Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: aisuite>=0.1.10
11
+ Requires-Dist: docstring_parser>=0.16
12
+ Requires-Dist: pydantic>=2.10.6
13
+ Requires-Dist: rich>=13.9.4
14
+ Requires-Dist: slack_sdk>=3.35.0
15
+ Provides-Extra: test
16
+ Requires-Dist: pytest<9,>=8; extra == "test"
17
+ Requires-Dist: pytest-cov<6,>=5; extra == "test"
18
+ Requires-Dist: typer>=0.15.2; extra == "test"
19
+ Requires-Dist: click>=8.2.0; extra == "test"
20
+ Requires-Dist: pytest>=8.3.5; extra == "test"
21
+ Requires-Dist: neo4j>=5.28.1; extra == "test"
22
+ Requires-Dist: gofannon>=0.25.13; extra == "test"
23
+ Requires-Dist: lapidarist>=0.0.2; extra == "test"
24
+ Dynamic: license-file
32
25
 
33
26
  # Proscenium
34
27
 
@@ -45,4 +38,3 @@ Currently, proscenium development prioritizes support for domains where the crea
45
38
  See the [website](https://the-ai-alliance.github.io/proscenium/) for quickstart info, goals, and other links.
46
39
 
47
40
  To find the Proscenium community, see the [discussions](https://github.com/The-AI-Alliance/proscenium/discussions)
48
-
@@ -0,0 +1,38 @@
1
+ [build-system]
2
+ requires = [
3
+ "setuptools>=68",
4
+ "wheel"
5
+ ]
6
+ build-backend = "setuptools.build_meta"
7
+
8
+ [project]
9
+ name = "proscenium"
10
+ version = "0.0.11"
11
+ description = "Frame AI Agents"
12
+ authors = [{ name = "Adam Pingel", email = "oss@pingel.org" }]
13
+ license = "Apache-2.0"
14
+ readme = "README.md"
15
+ requires-python = ">=3.11"
16
+
17
+ dependencies = [
18
+ "aisuite>=0.1.10",
19
+ "docstring_parser>=0.16",
20
+ "pydantic>=2.10.6",
21
+ "rich>=13.9.4",
22
+ "slack_sdk>=3.35.0"
23
+ ]
24
+
25
+ [project.optional-dependencies]
26
+ test = [
27
+ "pytest>=8,<9",
28
+ "pytest-cov>=5,<6",
29
+ "typer>=0.15.2",
30
+ "click>=8.2.0",
31
+ "pytest>=8.3.5",
32
+ "neo4j>=5.28.1",
33
+ "gofannon>=0.25.13",
34
+ "lapidarist>=0.0.2"
35
+ ]
36
+
37
+ [project.scripts]
38
+ proscenium-bot = "proscenium.bin.bot:app"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -1,12 +1,12 @@
1
1
  from typing import List, Dict
2
+
2
3
  import logging
4
+ from rich.table import Table
3
5
 
4
6
  from pymilvus import MilvusClient
5
7
  from pymilvus import model
6
8
 
7
9
  from proscenium.verbs.complete import complete_simple
8
- from proscenium.verbs.display.milvus import chunk_hits_table
9
- from proscenium.verbs.vector_database import closest_chunks
10
10
 
11
11
  log = logging.getLogger(__name__)
12
12
 
@@ -37,6 +37,41 @@ def rag_prompt(chunks: List[Dict], query: str) -> str:
37
37
  return rag_prompt_template.format(context=context, query=query)
38
38
 
39
39
 
40
+ def closest_chunks(
41
+ client: MilvusClient,
42
+ embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
43
+ query: str,
44
+ collection_name: str,
45
+ k: int = 4,
46
+ ) -> List[Dict]:
47
+
48
+ client.load_collection(collection_name)
49
+
50
+ result = client.search(
51
+ collection_name=collection_name,
52
+ data=embedding_fn.encode_queries([query]),
53
+ anns_field="vector",
54
+ search_params={"metric": "IP", "offset": 0},
55
+ output_fields=["text"],
56
+ limit=k,
57
+ )
58
+
59
+ hits = result[0]
60
+
61
+ return hits
62
+
63
+
64
+ def chunk_hits_table(chunks: list[dict]) -> Table:
65
+
66
+ table = Table(title="Closest Chunks", show_lines=True)
67
+ table.add_column("id", justify="right")
68
+ table.add_column("distance")
69
+ table.add_column("entity.text", justify="right")
70
+ for chunk in chunks:
71
+ table.add_row(str(chunk["id"]), str(chunk["distance"]), chunk["entity"]["text"])
72
+ return table
73
+
74
+
40
75
  def answer_question(
41
76
  query: str,
42
77
  model_id: str,
@@ -0,0 +1,40 @@
1
+ Metadata-Version: 2.4
2
+ Name: proscenium
3
+ Version: 0.0.11
4
+ Summary: Frame AI Agents
5
+ Author-email: Adam Pingel <oss@pingel.org>
6
+ License-Expression: Apache-2.0
7
+ Requires-Python: >=3.11
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: aisuite>=0.1.10
11
+ Requires-Dist: docstring_parser>=0.16
12
+ Requires-Dist: pydantic>=2.10.6
13
+ Requires-Dist: rich>=13.9.4
14
+ Requires-Dist: slack_sdk>=3.35.0
15
+ Provides-Extra: test
16
+ Requires-Dist: pytest<9,>=8; extra == "test"
17
+ Requires-Dist: pytest-cov<6,>=5; extra == "test"
18
+ Requires-Dist: typer>=0.15.2; extra == "test"
19
+ Requires-Dist: click>=8.2.0; extra == "test"
20
+ Requires-Dist: pytest>=8.3.5; extra == "test"
21
+ Requires-Dist: neo4j>=5.28.1; extra == "test"
22
+ Requires-Dist: gofannon>=0.25.13; extra == "test"
23
+ Requires-Dist: lapidarist>=0.0.2; extra == "test"
24
+ Dynamic: license-file
25
+
26
+ # Proscenium
27
+
28
+ [![CI](https://github.com/The-AI-Alliance/proscenium/actions/workflows/pytest.yml/badge.svg)](https://github.com/The-AI-Alliance/proscenium/actions/workflows/pytest.yml)
29
+ [![PyPI](https://img.shields.io/pypi/v/proscenium)](https://pypi.org/project/proscenium/)
30
+ [![License](https://img.shields.io/github/license/The-AI-Alliance/proscenium)](https://github.com/The-AI-Alliance/proscenium/tree/main?tab=Apache-2.0-1-ov-file#readme)
31
+ [![Issues](https://img.shields.io/github/issues/The-AI-Alliance/proscenium)](https://github.com/The-AI-Alliance/proscenium/issues)
32
+ [![GitHub stars](https://img.shields.io/github/stars/The-AI-Alliance/proscenium?style=social)](https://github.com/The-AI-Alliance/proscenium/stargazers)
33
+
34
+ Proscenium is a small, experimental library of composable glue that allows for succinct construction of enterprise AI applications. It was started in February 2025 and is still in early development.
35
+
36
+ Currently, proscenium development prioritizes support for domains where the creation and use of structured data is critical.
37
+
38
+ See the [website](https://the-ai-alliance.github.io/proscenium/) for quickstart info, goals, and other links.
39
+
40
+ To find the Proscenium community, see the [discussions](https://github.com/The-AI-Alliance/proscenium/discussions)
@@ -0,0 +1,31 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/proscenium/__init__.py
5
+ src/proscenium.egg-info/PKG-INFO
6
+ src/proscenium.egg-info/SOURCES.txt
7
+ src/proscenium.egg-info/dependency_links.txt
8
+ src/proscenium.egg-info/entry_points.txt
9
+ src/proscenium.egg-info/requires.txt
10
+ src/proscenium.egg-info/top_level.txt
11
+ src/proscenium/admin/__init__.py
12
+ src/proscenium/bin/__init__.py
13
+ src/proscenium/bin/bot.py
14
+ src/proscenium/core/__init__.py
15
+ src/proscenium/interfaces/__init__.py
16
+ src/proscenium/interfaces/slack.py
17
+ src/proscenium/patterns/__init__.py
18
+ src/proscenium/patterns/graph_rag.py
19
+ src/proscenium/patterns/rag.py
20
+ src/proscenium/patterns/tools.py
21
+ src/proscenium/util/__init__.py
22
+ src/proscenium/verbs/__init__.py
23
+ src/proscenium/verbs/complete.py
24
+ src/proscenium/verbs/display.py
25
+ src/proscenium/verbs/invoke.py
26
+ src/proscenium/verbs/remember.py
27
+ src/proscenium/verbs/display/__init__.py
28
+ src/proscenium/verbs/display/chat.py
29
+ src/proscenium/verbs/display/tools.py
30
+ tests/test_demo_typer_help.py
31
+ tests/test_display.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ proscenium-bot = proscenium.bin.bot:app
@@ -0,0 +1,15 @@
1
+ aisuite>=0.1.10
2
+ docstring_parser>=0.16
3
+ pydantic>=2.10.6
4
+ rich>=13.9.4
5
+ slack_sdk>=3.35.0
6
+
7
+ [test]
8
+ pytest<9,>=8
9
+ pytest-cov<6,>=5
10
+ typer>=0.15.2
11
+ click>=8.2.0
12
+ pytest>=8.3.5
13
+ neo4j>=5.28.1
14
+ gofannon>=0.25.13
15
+ lapidarist>=0.0.2
@@ -0,0 +1 @@
1
+ proscenium
@@ -0,0 +1,32 @@
1
+ import pytest
2
+
3
+ from click import Command
4
+ from typer.main import get_command
5
+
6
+ from demo.cli import app
7
+
8
+
9
+ def _check_help_on_command(command: Command):
10
+ """
11
+ Check that this command (and any subcommands) has a non-empty help string.
12
+ """
13
+
14
+ assert (
15
+ command.help is not None and command.help.strip() != ""
16
+ ), f"Command '{command.name}' is missing a help string"
17
+
18
+ # recurse into subcommands
19
+ if hasattr(command, "commands"):
20
+ for subcommand_name, subcommand in command.commands.items():
21
+ _check_help_on_command(subcommand)
22
+
23
+
24
+ @pytest.mark.parametrize("command_name", list(get_command(app).commands.keys()))
25
+ def test_command_has_help(command_name: str):
26
+ """
27
+ Test each top-level command in the demo typer app to ensure
28
+ it (and any subcommands) have help strings.
29
+ """
30
+ root_command: Command = get_command(app)
31
+ command = root_command.commands[command_name]
32
+ _check_help_on_command(command)
@@ -0,0 +1,7 @@
1
+ from rich import print
2
+ from proscenium.verbs.display import header
3
+
4
+
5
+ def test_header():
6
+ print(header())
7
+ assert True, "Printed header"
@@ -1,51 +0,0 @@
1
- from typing import Optional
2
- import logging
3
- from rich.console import Console
4
- from pymilvus import model
5
-
6
- from proscenium.verbs.read import load_file
7
- from proscenium.verbs.chunk import documents_to_chunks_by_characters
8
- from proscenium.verbs.display.milvus import collection_panel
9
- from proscenium.verbs.vector_database import vector_db
10
- from proscenium.verbs.vector_database import create_collection
11
- from proscenium.verbs.vector_database import add_chunks_to_vector_db
12
-
13
- log = logging.getLogger(__name__)
14
-
15
-
16
- def load_chunks_from_files(
17
- data_files: list[str],
18
- milvus_uri: str,
19
- embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
20
- collection_name: str,
21
- console: Optional[Console] = None,
22
- ) -> None:
23
-
24
- vector_db_client = vector_db(milvus_uri)
25
- log.info("Vector db stored at %s", milvus_uri)
26
-
27
- for data_file in data_files:
28
-
29
- log.info(
30
- "Loading data file %s into vector db %s collection %s",
31
- data_file,
32
- milvus_uri,
33
- collection_name,
34
- )
35
- create_collection(vector_db_client, embedding_fn, collection_name)
36
-
37
- documents = load_file(data_file)
38
- chunks = documents_to_chunks_by_characters(documents)
39
- log.info("Data file %s has %s chunks", data_file, len(chunks))
40
-
41
- info = add_chunks_to_vector_db(
42
- vector_db_client,
43
- embedding_fn,
44
- chunks,
45
- collection_name,
46
- )
47
- log.info("%s chunks inserted ", info["insert_count"])
48
- if console is not None:
49
- console.print(collection_panel(vector_db_client, collection_name))
50
-
51
- vector_db_client.close()
@@ -1,84 +0,0 @@
1
- from typing import List
2
- from typing import Callable
3
- from typing import Optional
4
-
5
- import time
6
- import logging
7
- from pydantic import BaseModel
8
-
9
- from rich.panel import Panel
10
- from rich.console import Console
11
- from rich.progress import Progress
12
-
13
- from langchain_core.documents.base import Document
14
-
15
- from proscenium.verbs.chunk import documents_to_chunks_by_tokens
16
- from proscenium.verbs.extract import extract_to_pydantic_model
17
-
18
- log = logging.getLogger(__name__)
19
-
20
-
21
- def extract_from_document_chunks(
22
- doc: Document,
23
- doc_as_rich: Callable[[Document], Panel],
24
- chunk_extraction_model_id: str,
25
- chunk_extraction_template: str,
26
- chunk_extract_clazz: type[BaseModel],
27
- delay: float,
28
- console: Optional[Console] = None,
29
- ) -> List[BaseModel]:
30
-
31
- if console is not None:
32
- console.print(doc_as_rich(doc))
33
- console.print()
34
-
35
- extract_models = []
36
-
37
- chunks = documents_to_chunks_by_tokens([doc], chunk_size=1000, chunk_overlap=0)
38
- for i, chunk in enumerate(chunks):
39
-
40
- ce = extract_to_pydantic_model(
41
- chunk_extraction_model_id,
42
- chunk_extraction_template,
43
- chunk_extract_clazz,
44
- chunk.page_content,
45
- )
46
-
47
- log.info("Extract model in chunk %s of %s", i + 1, len(chunks))
48
- if console is not None:
49
- console.print(Panel(str(ce)))
50
-
51
- extract_models.append(ce)
52
- time.sleep(delay)
53
-
54
- return extract_models
55
-
56
-
57
- def enrich_documents(
58
- retrieve_documents: Callable[[], List[Document]],
59
- extract_from_doc_chunks: Callable[[Document], List[BaseModel]],
60
- doc_enrichments: Callable[[Document, list[BaseModel]], BaseModel],
61
- enrichments_jsonl_file: str,
62
- console: Optional[Console] = None,
63
- ) -> None:
64
-
65
- docs = retrieve_documents()
66
-
67
- with Progress() as progress:
68
-
69
- task_enrich = progress.add_task(
70
- "[green]Enriching documents...", total=len(docs)
71
- )
72
-
73
- with open(enrichments_jsonl_file, "wt") as f:
74
-
75
- for doc in docs:
76
-
77
- chunk_extract_models = extract_from_doc_chunks(doc)
78
- enrichments = doc_enrichments(doc, chunk_extract_models)
79
- enrichments_json = enrichments.model_dump_json()
80
- f.write(enrichments_json + "\n")
81
-
82
- progress.update(task_enrich, advance=1)
83
-
84
- log.info("Wrote document enrichments to %s", enrichments_jsonl_file)
@@ -1,95 +0,0 @@
1
- from typing import Optional
2
- import logging
3
-
4
- from rich.console import Console
5
- from langchain_core.documents.base import Document
6
- from neo4j import Driver
7
-
8
- from pymilvus import MilvusClient
9
-
10
- from proscenium.verbs.vector_database import vector_db
11
- from proscenium.verbs.vector_database import create_collection
12
- from proscenium.verbs.vector_database import closest_chunks
13
- from proscenium.verbs.vector_database import add_chunks_to_vector_db
14
- from proscenium.verbs.vector_database import embedding_function
15
- from proscenium.verbs.display.milvus import collection_panel
16
-
17
- log = logging.getLogger(__name__)
18
-
19
-
20
- class Resolver:
21
-
22
- def __init__(
23
- self,
24
- cypher: str,
25
- field_name: str,
26
- collection_name: str,
27
- ):
28
- self.cypher = cypher
29
- self.field_name = field_name
30
- self.collection_name = collection_name
31
-
32
-
33
- def load_entity_resolver(
34
- driver: Driver,
35
- resolvers: list[Resolver],
36
- embedding_model_id: str,
37
- milvus_uri: str,
38
- console: Optional[Console] = None,
39
- ) -> None:
40
-
41
- vector_db_client = vector_db(milvus_uri)
42
- log.info("Vector db stored at %s", milvus_uri)
43
-
44
- embedding_fn = embedding_function(embedding_model_id)
45
- log.info("Embedding model %s", embedding_model_id)
46
-
47
- for resolver in resolvers:
48
-
49
- values = []
50
- with driver.session() as session:
51
- result = session.run(resolver.cypher)
52
- new_values = [Document(record[resolver.field_name]) for record in result]
53
- values.extend(new_values)
54
-
55
- log.info("Loading entity resolver into vector db %s", resolver.collection_name)
56
- create_collection(vector_db_client, embedding_fn, resolver.collection_name)
57
-
58
- info = add_chunks_to_vector_db(
59
- vector_db_client, embedding_fn, values, resolver.collection_name
60
- )
61
- log.info("%s chunks inserted ", info["insert_count"])
62
-
63
- if console is not None:
64
- console.print(collection_panel(vector_db_client, resolver.collection_name))
65
-
66
- vector_db_client.close()
67
-
68
-
69
- def find_matching_objects(
70
- vector_db_client: MilvusClient,
71
- approximate: str,
72
- resolver: Resolver,
73
- ) -> Optional[str]:
74
-
75
- log.info("Loading collection", resolver.collection_name)
76
- vector_db_client.load_collection(resolver.collection_name)
77
-
78
- log.info(
79
- "Finding entity matches for", approximate, "using", resolver.collection_name
80
- )
81
-
82
- hits = closest_chunks(
83
- vector_db_client,
84
- resolver.embedding_fn,
85
- approximate,
86
- resolver.collection_name,
87
- k=5,
88
- )
89
- # TODO apply distance threshold
90
- for match in [head["entity"]["text"] for head in hits[:1]]:
91
- log.info("Closest match:", match)
92
- return match
93
-
94
- log.info("No match found")
95
- return None
@@ -1,41 +0,0 @@
1
- from typing import Callable
2
- from typing import Any
3
-
4
- import logging
5
- import json
6
- from pydantic import BaseModel
7
-
8
- from rich.progress import Progress
9
-
10
- from neo4j import Driver
11
-
12
- log = logging.getLogger(__name__)
13
-
14
-
15
- def load_knowledge_graph(
16
- driver: Driver,
17
- enrichments_jsonl_file: str,
18
- enrichments_clazz: type[BaseModel],
19
- doc_enrichments_to_graph: Callable[[Any, BaseModel], None],
20
- ) -> None:
21
-
22
- log.info("Parsing enrichments from %s", enrichments_jsonl_file)
23
-
24
- enrichmentss = []
25
- with open(enrichments_jsonl_file, "r") as f:
26
- for line in f:
27
- e = enrichments_clazz.model_construct(**json.loads(line))
28
- enrichmentss.append(e)
29
-
30
- with Progress() as progress:
31
-
32
- task_load = progress.add_task(
33
- f"Loading {len(enrichmentss)} enriched documents into graph...",
34
- total=len(enrichmentss),
35
- )
36
-
37
- with driver.session() as session:
38
- session.run("MATCH (n) DETACH DELETE n") # empty graph
39
- for e in enrichmentss:
40
- session.execute_write(doc_enrichments_to_graph, e)
41
- progress.update(task_load, advance=1)
@@ -1,42 +0,0 @@
1
- import logging
2
- import os
3
- from typing import List
4
- from typing import Iterable
5
-
6
- from langchain_core.documents.base import Document
7
-
8
- from langchain.text_splitter import CharacterTextSplitter
9
- from langchain.text_splitter import TokenTextSplitter
10
-
11
- log = logging.getLogger(__name__)
12
-
13
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
14
- logging.getLogger("langchain_text_splitters.base").setLevel(logging.ERROR)
15
-
16
- # Each text chunk inherits the metadata from the document.
17
-
18
-
19
- def documents_to_chunks_by_characters(
20
- documents: Iterable[Document], chunk_size: int = 1000, chunk_overlap: int = 0
21
- ) -> List[Document]:
22
-
23
- text_splitter = CharacterTextSplitter(
24
- chunk_size=chunk_size, chunk_overlap=chunk_overlap
25
- )
26
-
27
- chunks = text_splitter.split_documents(documents)
28
-
29
- return chunks
30
-
31
-
32
- def documents_to_chunks_by_tokens(
33
- documents: Iterable[Document], chunk_size: int = 1000, chunk_overlap: int = 0
34
- ) -> List[Document]:
35
-
36
- text_splitter = TokenTextSplitter(
37
- chunk_size=chunk_size, chunk_overlap=chunk_overlap
38
- )
39
-
40
- chunks = text_splitter.split_documents(documents)
41
-
42
- return chunks
@@ -1,68 +0,0 @@
1
- from rich.table import Table
2
- from rich.panel import Panel
3
- from rich.text import Text
4
- from rich.console import Group
5
- from pymilvus import MilvusClient
6
-
7
-
8
- def chunk_hits_table(chunks: list[dict]) -> Table:
9
-
10
- table = Table(title="Closest Chunks", show_lines=True)
11
- table.add_column("id", justify="right")
12
- table.add_column("distance")
13
- table.add_column("entity.text", justify="right")
14
- for chunk in chunks:
15
- table.add_row(str(chunk["id"]), str(chunk["distance"]), chunk["entity"]["text"])
16
- return table
17
-
18
-
19
- def collection_panel(client: MilvusClient, collection_name: str) -> Panel:
20
-
21
- stats = client.get_collection_stats(collection_name)
22
- desc = client.describe_collection(collection_name)
23
-
24
- params_text = Text(
25
- f"""
26
- Collection Name: {desc['collection_name']}
27
- Auto ID: {desc['auto_id']}
28
- Num Shards: {desc['num_shards']}
29
- Description: {desc['description']}
30
- Functions: {desc['functions']}
31
- Aliases: {desc['aliases']}
32
- Collection ID: {desc['collection_id']}
33
- Consistency Level: {desc['consistency_level']}
34
- Properties: {desc['properties']}
35
- Num Partitions: {desc['num_partitions']}
36
- Enable Dynamic Field: {desc['enable_dynamic_field']}"""
37
- )
38
-
39
- params_panel = Panel(params_text, title="Params")
40
-
41
- fields_table = Table(title="Fields", show_lines=True)
42
- fields_table.add_column("id", justify="left")
43
- fields_table.add_column("name", justify="left")
44
- fields_table.add_column("description", justify="left")
45
- fields_table.add_column("type", justify="left")
46
- fields_table.add_column("params", justify="left")
47
- fields_table.add_column("auto_id", justify="left")
48
- fields_table.add_column("is_primary", justify="left")
49
- for field in desc["fields"]:
50
- fields_table.add_row(
51
- str(field["field_id"]), # int
52
- field["name"],
53
- field["description"],
54
- field["type"].name, # Milvus DataType
55
- "\n".join([f"{k}: {v}" for k, v in field["params"].items()]),
56
- str(field.get("auto_id", "-")), # bool
57
- str(field.get("is_primary", "-")),
58
- ) # bool
59
-
60
- stats_text = Text("\n".join([f"{k}: {v}" for k, v in stats.items()]))
61
- stats_panel = Panel(stats_text, title="Stats")
62
-
63
- panel = Panel(
64
- Group(params_panel, fields_table, stats_panel),
65
- title=f"Collection {collection_name}",
66
- )
67
-
68
- return panel
@@ -1,25 +0,0 @@
1
- from typing import List
2
- from rich.table import Table
3
-
4
-
5
- def triples_table(triples: List[tuple[str, str, str]], title: str) -> Table:
6
-
7
- table = Table(title=title, show_lines=False)
8
- table.add_column("Subject", justify="left")
9
- table.add_column("Predicate", justify="left")
10
- table.add_column("Object", justify="left")
11
- for triple in triples:
12
- table.add_row(*triple)
13
-
14
- return table
15
-
16
-
17
- def pairs_table(subject_predicate_pairs: List[tuple[str, str]], title: str) -> Table:
18
-
19
- table = Table(title=title, show_lines=False)
20
- table.add_column("Subject", justify="left")
21
- table.add_column("Predicate", justify="left")
22
- for pair in subject_predicate_pairs:
23
- table.add_row(*pair)
24
-
25
- return table
@@ -1,65 +0,0 @@
1
- from typing import Optional
2
- import logging
3
- from rich.console import Console
4
- from string import Formatter
5
-
6
- import json
7
- from pydantic import BaseModel
8
-
9
- from proscenium.verbs.complete import complete_simple
10
-
11
- log = logging.getLogger(__name__)
12
-
13
- extraction_system_prompt = "You are an entity extractor"
14
-
15
-
16
- class PartialFormatter(Formatter):
17
- def get_value(self, key, args, kwargs):
18
- try:
19
- return super().get_value(key, args, kwargs)
20
- except KeyError:
21
- return "{" + key + "}"
22
-
23
-
24
- partial_formatter = PartialFormatter()
25
-
26
- raw_extraction_template = """\
27
- Below is a description of a data class for storing information extracted from text:
28
-
29
- {extraction_description}
30
-
31
- Find the information in the following text, and provide them in the specified JSON response format.
32
- Only answer in JSON.:
33
-
34
- {text}
35
- """
36
-
37
-
38
- def extract_to_pydantic_model(
39
- extraction_model_id: str,
40
- extraction_template: str,
41
- clazz: type[BaseModel],
42
- text: str,
43
- console: Optional[Console] = None,
44
- ) -> BaseModel:
45
-
46
- extract_str = complete_simple(
47
- extraction_model_id,
48
- extraction_system_prompt,
49
- extraction_template.format(text=text),
50
- response_format={
51
- "type": "json_object",
52
- "schema": clazz.model_json_schema(),
53
- },
54
- console=console,
55
- )
56
-
57
- log.info("complete_to_pydantic_model: extract_str = <<<%s>>>", extract_str)
58
-
59
- try:
60
- extract_dict = json.loads(extract_str)
61
- return clazz.model_construct(**extract_dict)
62
- except Exception as e:
63
- log.error("complete_to_pydantic_model: Exception: %s", e)
64
-
65
- return None
@@ -1,53 +0,0 @@
1
- from typing import List
2
-
3
- import os
4
- import logging
5
-
6
- import httpx
7
- from pydantic.networks import HttpUrl
8
- from pathlib import Path
9
-
10
- from langchain_core.documents.base import Document
11
- from langchain_community.document_loaders import TextLoader
12
- from langchain_community.document_loaders.hugging_face_dataset import (
13
- HuggingFaceDatasetLoader,
14
- )
15
-
16
- log = logging.getLogger(__name__)
17
-
18
- os.environ["TOKENIZERS_PARALLELISM"] = "false"
19
- logging.getLogger("langchain_text_splitters.base").setLevel(logging.ERROR)
20
-
21
-
22
- def load_file(filename: str) -> List[Document]:
23
-
24
- loader = TextLoader(filename)
25
- documents = loader.load()
26
-
27
- return documents
28
-
29
-
30
- def load_hugging_face_dataset(
31
- dataset_name: str, page_content_column: str = "text"
32
- ) -> List[Document]:
33
-
34
- loader = HuggingFaceDatasetLoader(
35
- dataset_name, page_content_column=page_content_column
36
- )
37
- documents = loader.load()
38
-
39
- return documents
40
-
41
-
42
- async def url_to_file(url: HttpUrl, data_file: Path, overwrite: bool = False):
43
-
44
- if data_file.exists() and not overwrite:
45
- return
46
-
47
- async with httpx.AsyncClient() as client:
48
-
49
- response = await client.get(url)
50
- response.raise_for_status()
51
-
52
- with open(data_file, "wb") as file:
53
- file.write(response.content)
@@ -1,139 +0,0 @@
1
- from typing import Dict, List
2
-
3
- import logging
4
- from pathlib import Path
5
- from langchain_core.documents.base import Document
6
- from urllib.parse import urlsplit
7
- from pymilvus import MilvusClient
8
- from pymilvus import DataType, FieldSchema, CollectionSchema
9
- from pymilvus import model
10
-
11
- # See https://milvus.io/docs/quickstart.md
12
-
13
- log = logging.getLogger(__name__)
14
-
15
-
16
- def embedding_function(
17
- embedding_model_id: str,
18
- ) -> model.dense.SentenceTransformerEmbeddingFunction:
19
- embedding_fn = model.dense.SentenceTransformerEmbeddingFunction(
20
- model_name=embedding_model_id, device="cpu" # or 'cuda:0'
21
- )
22
- return embedding_fn
23
-
24
-
25
- def schema_chunks(
26
- embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
27
- ) -> CollectionSchema:
28
-
29
- field_id = FieldSchema(
30
- name="id", dtype=DataType.INT64, is_primary=True, auto_id=True
31
- )
32
- field_text = FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=50000)
33
- field_vector = FieldSchema(
34
- name="vector", dtype=DataType.FLOAT_VECTOR, dim=embedding_fn.dim
35
- )
36
-
37
- schema = CollectionSchema(
38
- fields=[field_id, field_text, field_vector],
39
- description="Chunks Schema",
40
- enable_dynamic_field=True,
41
- )
42
-
43
- return schema
44
-
45
-
46
- def vector_db(
47
- uri: str,
48
- ) -> MilvusClient:
49
-
50
- log.info("Connecting to vector db %s", uri)
51
- uri_fields = urlsplit(uri)
52
- client = None
53
- if uri_fields[0] == "file":
54
- file_path = Path(uri_fields[2][1:])
55
- if file_path.exists():
56
- log.info(
57
- "Using existing %s file.",
58
- uri_fields[2],
59
- )
60
- else:
61
- log.info("Creating new vector db file %s", file_path)
62
-
63
- client = MilvusClient(uri=str(file_path))
64
-
65
- else:
66
-
67
- log.info("Connecting to vector db at non-file uri %s", uri)
68
- client = MilvusClient(uri=uri)
69
-
70
- return client
71
-
72
-
73
- def create_collection(
74
- client: MilvusClient,
75
- embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
76
- collection_name: str,
77
- ) -> None:
78
-
79
- client.create_collection(
80
- collection_name=collection_name,
81
- schema=schema_chunks(embedding_fn),
82
- )
83
-
84
- index_params = client.prepare_index_params()
85
-
86
- index_params.add_index(
87
- field_name="vector",
88
- index_type="IVF_FLAT",
89
- metric_type="IP",
90
- params={"nlist": 1024},
91
- )
92
-
93
- client.create_index(
94
- collection_name=collection_name, index_params=index_params, sync=True
95
- )
96
- log.info("Created collection %s", collection_name)
97
-
98
-
99
- def add_chunks_to_vector_db(
100
- client: MilvusClient,
101
- embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
102
- chunks: List[Document],
103
- collection_name: str,
104
- ) -> Dict:
105
-
106
- vectors = embedding_fn.encode_documents([chunk.page_content for chunk in chunks])
107
-
108
- data = [
109
- {"text": chunk.page_content, "vector": vector}
110
- for chunk, vector in zip(chunks, vectors)
111
- ]
112
-
113
- insert_result = client.insert(collection_name, data)
114
-
115
- return insert_result
116
-
117
-
118
- def closest_chunks(
119
- client: MilvusClient,
120
- embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
121
- query: str,
122
- collection_name: str,
123
- k: int = 4,
124
- ) -> List[Dict]:
125
-
126
- client.load_collection(collection_name)
127
-
128
- result = client.search(
129
- collection_name=collection_name,
130
- data=embedding_fn.encode_queries([query]),
131
- anns_field="vector",
132
- search_params={"metric": "IP", "offset": 0},
133
- output_fields=["text"],
134
- limit=k,
135
- )
136
-
137
- hits = result[0]
138
-
139
- return hits
@@ -1,14 +0,0 @@
1
- from typing import List
2
-
3
- import logging
4
- import csv
5
-
6
- log = logging.getLogger(__name__)
7
-
8
-
9
- def triples_to_csv(triples: List[tuple[str, str, str]], filename: str) -> None:
10
-
11
- with open(filename, "wt") as f:
12
- writer = csv.writer(f, delimiter=",", quotechar='"')
13
- writer.writerow(["entity", "role", "case name"]) # header
14
- writer.writerows(triples)
@@ -1,37 +0,0 @@
1
- [tool.poetry]
2
- name = "proscenium"
3
- version = "0.0.8"
4
- description = "Frame AI Agents"
5
- authors = ["Adam Pingel <oss@pingel.org>"]
6
- license = "ASFv2"
7
- readme = "README.md"
8
- packages = [{include = "proscenium"}]
9
-
10
- [tool.poetry.dependencies]
11
- python = "^3.11"
12
- aisuite = {extras = ["openai,anthropic"], version = "^0.1.10"}
13
- rich = "^13.9.4"
14
- typer = "^0.15.2"
15
- python-dotenv = "^1.0.1"
16
- pydantic = "^2.10.6"
17
- stringcase = "^1.2.0"
18
- docstring_parser = "^0.16"
19
- pymilvus = {version = "^2.5.4"}
20
- pymilvus_model = {version = "^0.3.1"}
21
- datasets = "^3.3.2"
22
- tiktoken = "^0.9.0"
23
- neo4j = "^5.28.1"
24
- gofannon = "^0.25.13"
25
- langchain-community = "^0.3.18"
26
- langchain-huggingface = "^0.1.2"
27
- slack_sdk = "^3.35.0"
28
-
29
- [tool.poetry.extras]
30
- testing = ["pytest"]
31
-
32
- [tool.poetry.scripts]
33
- proscenium-bot = "proscenium.bin.bot:app"
34
-
35
- [build-system]
36
- requires = ["poetry-core"]
37
- build-backend = "poetry.core.masonry.api"
File without changes
File without changes