proscenium 0.0.2__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proscenium/__init__.py +3 -0
- proscenium/admin/__init__.py +37 -0
- proscenium/bin/bot.py +142 -0
- proscenium/core/__init__.py +152 -0
- proscenium/interfaces/__init__.py +3 -0
- proscenium/interfaces/slack.py +265 -0
- proscenium/patterns/__init__.py +3 -0
- proscenium/patterns/chunk_space.py +51 -0
- proscenium/{scripts → patterns}/document_enricher.py +4 -2
- proscenium/{scripts → patterns}/entity_resolver.py +16 -16
- proscenium/{scripts → patterns}/graph_rag.py +12 -7
- proscenium/{scripts → patterns}/knowledge_graph.py +3 -1
- proscenium/{scripts → patterns}/rag.py +6 -8
- proscenium/{scripts → patterns}/tools.py +3 -1
- proscenium/verbs/__init__.py +3 -0
- proscenium/verbs/chunk.py +2 -0
- proscenium/verbs/complete.py +6 -4
- proscenium/verbs/display.py +3 -0
- proscenium/verbs/extract.py +4 -2
- proscenium/verbs/invoke.py +3 -0
- proscenium/verbs/read.py +6 -6
- proscenium/verbs/remember.py +5 -0
- proscenium/verbs/vector_database.py +11 -18
- proscenium/verbs/write.py +3 -0
- {proscenium-0.0.2.dist-info → proscenium-0.0.3.dist-info}/METADATA +2 -1
- proscenium-0.0.3.dist-info/RECORD +34 -0
- {proscenium-0.0.2.dist-info → proscenium-0.0.3.dist-info}/WHEEL +1 -1
- proscenium-0.0.3.dist-info/entry_points.txt +3 -0
- proscenium/scripts/__init__.py +0 -0
- proscenium/scripts/chunk_space.py +0 -39
- proscenium/verbs/display/huggingface.py +0 -0
- proscenium-0.0.2.dist-info/RECORD +0 -29
- {proscenium-0.0.2.dist-info → proscenium-0.0.3.dist-info}/LICENSE +0 -0
proscenium/__init__.py
CHANGED
@@ -0,0 +1,37 @@
|
|
1
|
+
from typing import Generator
|
2
|
+
from typing import List
|
3
|
+
from typing import Optional
|
4
|
+
|
5
|
+
import logging
|
6
|
+
|
7
|
+
from proscenium.core import Prop
|
8
|
+
from proscenium.core import Character
|
9
|
+
from rich.console import Console
|
10
|
+
|
11
|
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
12
|
+
|
13
|
+
log = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
system_message = """
|
16
|
+
You are an administrator of a chatbot.
|
17
|
+
"""
|
18
|
+
|
19
|
+
|
20
|
+
def props(console: Optional[Console]) -> List[Prop]:
|
21
|
+
|
22
|
+
return []
|
23
|
+
|
24
|
+
|
25
|
+
class Admin(Character):
|
26
|
+
|
27
|
+
def __init__(self, admin_channel_id: str):
|
28
|
+
super().__init__(admin_channel_id)
|
29
|
+
self.admin_channel_id = admin_channel_id
|
30
|
+
|
31
|
+
def handle(
|
32
|
+
channel_id: str,
|
33
|
+
speaker_id: str,
|
34
|
+
question: str,
|
35
|
+
) -> Generator[tuple[str, str], None, None]:
|
36
|
+
|
37
|
+
yield channel_id, "I am the administrator of this chat system."
|
proscenium/bin/bot.py
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
import os
|
4
|
+
import sys
|
5
|
+
import logging
|
6
|
+
import typer
|
7
|
+
import importlib
|
8
|
+
from rich.console import Console
|
9
|
+
|
10
|
+
from proscenium.admin import Admin
|
11
|
+
|
12
|
+
from proscenium.interfaces.slack import (
|
13
|
+
get_slack_auth,
|
14
|
+
channel_table,
|
15
|
+
bot_user_id,
|
16
|
+
places_table,
|
17
|
+
channel_maps,
|
18
|
+
make_slack_listener,
|
19
|
+
connect,
|
20
|
+
send_curtain_up,
|
21
|
+
listen,
|
22
|
+
send_curtain_down,
|
23
|
+
shutdown,
|
24
|
+
)
|
25
|
+
|
26
|
+
from proscenium.verbs.display import header
|
27
|
+
|
28
|
+
logging.basicConfig(
|
29
|
+
stream=sys.stdout,
|
30
|
+
format="%(asctime)s %(levelname)-8s %(name)s: %(message)s",
|
31
|
+
level=logging.WARNING,
|
32
|
+
)
|
33
|
+
|
34
|
+
logging.basicConfig(
|
35
|
+
stream=sys.stdout,
|
36
|
+
format="%(asctime)s %(levelname)-8s %(name)s: %(message)s",
|
37
|
+
level=logging.WARNING,
|
38
|
+
)
|
39
|
+
|
40
|
+
app = typer.Typer(help="Proscenium Bot")
|
41
|
+
|
42
|
+
log = logging.getLogger(__name__)
|
43
|
+
|
44
|
+
|
45
|
+
@app.command(help="""Start the Proscenium Bot.""")
|
46
|
+
def start(
|
47
|
+
verbose: bool = False,
|
48
|
+
production_module_name: str = typer.Option(
|
49
|
+
"demo.production",
|
50
|
+
"-p",
|
51
|
+
"--production",
|
52
|
+
help="The name of the python module in PYTHONPATH in which the variable production of type proscenium.core.Production is defined.",
|
53
|
+
),
|
54
|
+
force_rebuild: bool = False,
|
55
|
+
):
|
56
|
+
|
57
|
+
console = Console()
|
58
|
+
sub_console = None
|
59
|
+
|
60
|
+
if verbose:
|
61
|
+
log.setLevel(logging.INFO)
|
62
|
+
logging.getLogger("proscenium").setLevel(logging.INFO)
|
63
|
+
logging.getLogger("demo").setLevel(logging.INFO)
|
64
|
+
sub_console = console
|
65
|
+
|
66
|
+
console.print(header())
|
67
|
+
|
68
|
+
production_module = importlib.import_module(production_module_name, package=None)
|
69
|
+
|
70
|
+
slack_admin_channel_id = os.environ.get("SLACK_ADMIN_CHANNEL_ID")
|
71
|
+
# Note that the checking of the existence of the admin channel id is delayed
|
72
|
+
# until after the subscribed channels are shown.
|
73
|
+
|
74
|
+
production = production_module.make_production(slack_admin_channel_id, sub_console)
|
75
|
+
|
76
|
+
console.print("Preparing props...")
|
77
|
+
production.prepare_props()
|
78
|
+
console.print("Props are up-to-date.")
|
79
|
+
|
80
|
+
slack_app_token, slack_bot_token = get_slack_auth()
|
81
|
+
|
82
|
+
socket_mode_client = connect(slack_app_token, slack_bot_token)
|
83
|
+
|
84
|
+
user_id = bot_user_id(socket_mode_client, console)
|
85
|
+
console.print()
|
86
|
+
|
87
|
+
channels_by_id, channel_name_to_id = channel_maps(socket_mode_client)
|
88
|
+
console.print(channel_table(channels_by_id))
|
89
|
+
console.print()
|
90
|
+
|
91
|
+
if slack_admin_channel_id is None:
|
92
|
+
raise ValueError(
|
93
|
+
"SLACK_ADMIN_CHANNEL_ID environment variable not set. "
|
94
|
+
"Please set it to the channel ID of the Proscenium admin channel."
|
95
|
+
)
|
96
|
+
if slack_admin_channel_id not in channels_by_id:
|
97
|
+
raise ValueError(
|
98
|
+
f"Admin channel {slack_admin_channel_id} not found in subscribed channels."
|
99
|
+
)
|
100
|
+
|
101
|
+
admin = Admin(slack_admin_channel_id)
|
102
|
+
log.info("Admin handler started.")
|
103
|
+
|
104
|
+
log.info("Places, please!")
|
105
|
+
channel_id_to_character = production.places(channel_name_to_id)
|
106
|
+
channel_id_to_character[slack_admin_channel_id] = admin
|
107
|
+
|
108
|
+
console.print(places_table(channel_id_to_character, channels_by_id))
|
109
|
+
console.print()
|
110
|
+
|
111
|
+
slack_listener = make_slack_listener(
|
112
|
+
user_id,
|
113
|
+
slack_admin_channel_id,
|
114
|
+
channels_by_id,
|
115
|
+
channel_id_to_character,
|
116
|
+
console,
|
117
|
+
)
|
118
|
+
|
119
|
+
send_curtain_up(socket_mode_client, production, slack_admin_channel_id)
|
120
|
+
|
121
|
+
console.print("Starting the show. Listening for events...")
|
122
|
+
listen(
|
123
|
+
socket_mode_client,
|
124
|
+
slack_listener,
|
125
|
+
user_id,
|
126
|
+
console,
|
127
|
+
)
|
128
|
+
|
129
|
+
send_curtain_down(socket_mode_client, slack_admin_channel_id)
|
130
|
+
|
131
|
+
shutdown(
|
132
|
+
socket_mode_client,
|
133
|
+
slack_listener,
|
134
|
+
user_id,
|
135
|
+
production,
|
136
|
+
console,
|
137
|
+
)
|
138
|
+
|
139
|
+
|
140
|
+
if __name__ == "__main__":
|
141
|
+
|
142
|
+
app()
|
@@ -0,0 +1,152 @@
|
|
1
|
+
from typing import Generator
|
2
|
+
from typing import Optional
|
3
|
+
import logging
|
4
|
+
from rich.console import Console
|
5
|
+
|
6
|
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
7
|
+
|
8
|
+
log = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
class Prop:
|
12
|
+
"""
|
13
|
+
A `Prop` is a resource available to the `Character`s in a `Scene`.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
console: Optional[Console] = None,
|
19
|
+
):
|
20
|
+
self.console = console
|
21
|
+
|
22
|
+
def name(self) -> str:
|
23
|
+
return self.__class__.__name__
|
24
|
+
|
25
|
+
def description(self) -> str:
|
26
|
+
return self.__doc__ or ""
|
27
|
+
|
28
|
+
def curtain_up_message(self) -> str:
|
29
|
+
return f"- {self.name()}, {self.description().strip()}"
|
30
|
+
|
31
|
+
def already_built(self) -> bool:
|
32
|
+
return False
|
33
|
+
|
34
|
+
def build(self) -> None:
|
35
|
+
pass
|
36
|
+
|
37
|
+
|
38
|
+
class Character:
|
39
|
+
"""
|
40
|
+
A `Character` is a participant in a `Scene` that `handle`s utterances from the
|
41
|
+
scene by producing its own utterances."""
|
42
|
+
|
43
|
+
def __init__(self, admin_channel_id: str):
|
44
|
+
self.admin_channel_id = admin_channel_id
|
45
|
+
|
46
|
+
def name(self) -> str:
|
47
|
+
return self.__class__.__name__
|
48
|
+
|
49
|
+
def description(self) -> str:
|
50
|
+
return self.__doc__ or ""
|
51
|
+
|
52
|
+
def curtain_up_message(self) -> str:
|
53
|
+
return f"- {self.name()}, {self.description().strip()}"
|
54
|
+
|
55
|
+
def handle(
|
56
|
+
channel_id: str, speaker_id: str, utterance: str
|
57
|
+
) -> Generator[tuple[str, str], None, None]:
|
58
|
+
pass
|
59
|
+
|
60
|
+
|
61
|
+
class Scene:
|
62
|
+
"""
|
63
|
+
A `Scene` is a setting in which `Character`s interact with each other and
|
64
|
+
with `Prop`s. It is a container for `Character`s and `Prop`s.
|
65
|
+
"""
|
66
|
+
|
67
|
+
def __init__(self):
|
68
|
+
pass
|
69
|
+
|
70
|
+
def name(self) -> str:
|
71
|
+
return self.__class__.__name__
|
72
|
+
|
73
|
+
def description(self) -> str:
|
74
|
+
return self.__doc__ or ""
|
75
|
+
|
76
|
+
def curtain_up_message(self) -> str:
|
77
|
+
|
78
|
+
characters_msg = "\n".join(
|
79
|
+
[character.curtain_up_message() for character in self.characters()]
|
80
|
+
)
|
81
|
+
|
82
|
+
props_msg = "\n".join([prop.curtain_up_message() for prop in self.props()])
|
83
|
+
|
84
|
+
return f"""
|
85
|
+
Scene: {self.name()}, {self.description().strip()}
|
86
|
+
|
87
|
+
Characters:
|
88
|
+
{characters_msg}
|
89
|
+
|
90
|
+
Props:
|
91
|
+
{props_msg}
|
92
|
+
"""
|
93
|
+
|
94
|
+
def props(self) -> list[Prop]:
|
95
|
+
return []
|
96
|
+
|
97
|
+
def prepare_props(self, force_rebuild: bool = False) -> None:
|
98
|
+
for prop in self.props():
|
99
|
+
if force_rebuild:
|
100
|
+
prop.build()
|
101
|
+
elif not prop.already_built():
|
102
|
+
log.info("Prop %s not built. Building it now.", prop.name())
|
103
|
+
prop.build()
|
104
|
+
|
105
|
+
def characters(self) -> list[Character]:
|
106
|
+
return []
|
107
|
+
|
108
|
+
def places(self) -> dict[str, Character]:
|
109
|
+
pass
|
110
|
+
|
111
|
+
def curtain(self) -> None:
|
112
|
+
pass
|
113
|
+
|
114
|
+
|
115
|
+
class Production:
|
116
|
+
"""
|
117
|
+
A `Production` is a collection of `Scene`s."""
|
118
|
+
|
119
|
+
def __init__(self):
|
120
|
+
pass
|
121
|
+
|
122
|
+
def name(self) -> str:
|
123
|
+
return self.__class__.__name__
|
124
|
+
|
125
|
+
def description(self) -> str:
|
126
|
+
return self.__doc__ or ""
|
127
|
+
|
128
|
+
def prepare_props(self, force_rebuild: bool = False) -> None:
|
129
|
+
if force_rebuild:
|
130
|
+
log.info("Forcing rebuild of all props.")
|
131
|
+
else:
|
132
|
+
log.info("Building any missing props...")
|
133
|
+
|
134
|
+
for scene in self.scenes():
|
135
|
+
scene.prepare_props(force_rebuild=force_rebuild)
|
136
|
+
|
137
|
+
def curtain_up_message(self) -> str:
|
138
|
+
|
139
|
+
scenes_msg = "\n\n".join(
|
140
|
+
[scene.curtain_up_message() for scene in self.scenes()]
|
141
|
+
)
|
142
|
+
|
143
|
+
return f"""Production: {self.name()}, {self.description().strip()}
|
144
|
+
|
145
|
+
{scenes_msg}"""
|
146
|
+
|
147
|
+
def scenes(self) -> list[Scene]:
|
148
|
+
return []
|
149
|
+
|
150
|
+
def curtain(self) -> None:
|
151
|
+
for scene in self.scenes():
|
152
|
+
scene.curtain()
|
@@ -0,0 +1,265 @@
|
|
1
|
+
from typing import Callable
|
2
|
+
|
3
|
+
from typing import Generator
|
4
|
+
import time
|
5
|
+
import logging
|
6
|
+
import os
|
7
|
+
from rich.console import Console
|
8
|
+
from rich.table import Table
|
9
|
+
|
10
|
+
from slack_sdk.web import WebClient
|
11
|
+
from slack_sdk.socket_mode import SocketModeClient
|
12
|
+
from slack_sdk.socket_mode.request import SocketModeRequest
|
13
|
+
from slack_sdk.socket_mode.response import SocketModeResponse
|
14
|
+
from slack_sdk.socket_mode.listeners import SocketModeRequestListener
|
15
|
+
|
16
|
+
from proscenium.core import Production
|
17
|
+
from proscenium.core import Character
|
18
|
+
|
19
|
+
log = logging.getLogger(__name__)
|
20
|
+
|
21
|
+
|
22
|
+
def get_slack_auth() -> tuple[str, str]:
|
23
|
+
|
24
|
+
slack_app_token = os.environ.get("SLACK_APP_TOKEN")
|
25
|
+
if slack_app_token is None:
|
26
|
+
raise ValueError(
|
27
|
+
"SLACK_APP_TOKEN environment variable not set. "
|
28
|
+
"Please set it to the app token of the Proscenium Slack app."
|
29
|
+
)
|
30
|
+
slack_bot_token = os.environ.get("SLACK_BOT_TOKEN")
|
31
|
+
if slack_bot_token is None:
|
32
|
+
raise ValueError(
|
33
|
+
"SLACK_BOT_TOKEN environment variable not set. "
|
34
|
+
"Please set it to the bot token of the Proscenium Slack app."
|
35
|
+
)
|
36
|
+
|
37
|
+
return slack_app_token, slack_bot_token
|
38
|
+
|
39
|
+
|
40
|
+
def connect(app_token: str, bot_token: str) -> SocketModeClient:
|
41
|
+
|
42
|
+
web_client = WebClient(token=bot_token)
|
43
|
+
socket_mode_client = SocketModeClient(app_token=app_token, web_client=web_client)
|
44
|
+
|
45
|
+
socket_mode_client.connect()
|
46
|
+
log.info("Connected to Slack.")
|
47
|
+
|
48
|
+
return socket_mode_client
|
49
|
+
|
50
|
+
|
51
|
+
def make_slack_listener(
|
52
|
+
proscenium_user_id: str,
|
53
|
+
admin_channel_id: str,
|
54
|
+
channels_by_id: dict,
|
55
|
+
channel_id_to_handler: dict[
|
56
|
+
str, Callable[[str, str, str], Generator[tuple[str, str], None, None]]
|
57
|
+
],
|
58
|
+
console: Console,
|
59
|
+
):
|
60
|
+
|
61
|
+
def process(client: SocketModeClient, req: SocketModeRequest):
|
62
|
+
|
63
|
+
if req.type == "events_api":
|
64
|
+
|
65
|
+
event = req.payload["event"]
|
66
|
+
|
67
|
+
response = SocketModeResponse(envelope_id=req.envelope_id)
|
68
|
+
client.send_socket_mode_response(response)
|
69
|
+
|
70
|
+
if event.get("type") in [
|
71
|
+
"message",
|
72
|
+
"app_mention",
|
73
|
+
]:
|
74
|
+
speaker_id = event.get("user")
|
75
|
+
if speaker_id == proscenium_user_id:
|
76
|
+
return
|
77
|
+
|
78
|
+
text = event.get("text")
|
79
|
+
channel_id = event.get("channel")
|
80
|
+
console.print(f"{speaker_id} in {channel_id} said something")
|
81
|
+
|
82
|
+
channel = channels_by_id.get(channel_id, None)
|
83
|
+
|
84
|
+
if channel is None:
|
85
|
+
|
86
|
+
# TODO: channels_by_id will get stale
|
87
|
+
log.info("No handler for channel id %s", channel_id)
|
88
|
+
|
89
|
+
else:
|
90
|
+
|
91
|
+
character = channel_id_to_handler[channel_id]
|
92
|
+
log.info("Handler defined for channel id %s", channel_id)
|
93
|
+
|
94
|
+
# TODO determine whether the handler has a good chance of being useful
|
95
|
+
|
96
|
+
for receiving_channel_id, response in character.handle(
|
97
|
+
channel_id, speaker_id, text
|
98
|
+
):
|
99
|
+
response_response = client.web_client.chat_postMessage(
|
100
|
+
channel=receiving_channel_id, text=response
|
101
|
+
)
|
102
|
+
log.info(
|
103
|
+
"Response sent to channel %s",
|
104
|
+
receiving_channel_id,
|
105
|
+
)
|
106
|
+
if receiving_channel_id == admin_channel_id:
|
107
|
+
continue
|
108
|
+
|
109
|
+
permalink = client.web_client.chat_getPermalink(
|
110
|
+
channel=receiving_channel_id,
|
111
|
+
message_ts=response_response["ts"],
|
112
|
+
)["permalink"]
|
113
|
+
log.info(
|
114
|
+
"Response sent to channel %s link %s",
|
115
|
+
receiving_channel_id,
|
116
|
+
permalink,
|
117
|
+
)
|
118
|
+
client.web_client.chat_postMessage(
|
119
|
+
channel=admin_channel_id,
|
120
|
+
text=permalink,
|
121
|
+
)
|
122
|
+
|
123
|
+
elif req.type == "interactive":
|
124
|
+
pass
|
125
|
+
elif req.type == "slash_commands":
|
126
|
+
pass
|
127
|
+
elif req.type == "app_home_opened":
|
128
|
+
pass
|
129
|
+
elif req.type == "block_actions":
|
130
|
+
pass
|
131
|
+
elif req.type == "message_actions":
|
132
|
+
pass
|
133
|
+
|
134
|
+
return process
|
135
|
+
|
136
|
+
|
137
|
+
def channel_maps(
|
138
|
+
socket_mode_client: SocketModeClient,
|
139
|
+
) -> tuple[dict[str, dict], dict[str, str]]:
|
140
|
+
|
141
|
+
subscribed_channels = socket_mode_client.web_client.users_conversations(
|
142
|
+
types="public_channel,private_channel,mpim,im",
|
143
|
+
limit=100,
|
144
|
+
)
|
145
|
+
log.info(
|
146
|
+
"Subscribed channels count: %s",
|
147
|
+
len(subscribed_channels["channels"]),
|
148
|
+
)
|
149
|
+
|
150
|
+
channels_by_id = {
|
151
|
+
channel["id"]: channel for channel in subscribed_channels["channels"]
|
152
|
+
}
|
153
|
+
|
154
|
+
channel_name_to_id = {
|
155
|
+
channel["name"]: channel["id"]
|
156
|
+
for channel in channels_by_id.values()
|
157
|
+
if channel.get("name")
|
158
|
+
}
|
159
|
+
|
160
|
+
return channels_by_id, channel_name_to_id
|
161
|
+
|
162
|
+
|
163
|
+
def channel_table(channels_by_id) -> Table:
|
164
|
+
channel_table = Table(title="Subscribed channels")
|
165
|
+
channel_table.add_column("Channel ID", justify="left")
|
166
|
+
channel_table.add_column("Name", justify="left")
|
167
|
+
for channel_id, channel in channels_by_id.items():
|
168
|
+
channel_table.add_row(
|
169
|
+
channel_id,
|
170
|
+
channel.get("name", "-"),
|
171
|
+
)
|
172
|
+
return channel_table
|
173
|
+
|
174
|
+
|
175
|
+
def bot_user_id(socket_mode_client: SocketModeClient, console: Console):
|
176
|
+
|
177
|
+
auth_response = socket_mode_client.web_client.auth_test()
|
178
|
+
|
179
|
+
console.print(auth_response["url"])
|
180
|
+
console.print()
|
181
|
+
console.print(f"Team '{auth_response["team"]}' ({auth_response["team_id"]})")
|
182
|
+
console.print(f"User '{auth_response["user"]}' ({auth_response["user_id"]})")
|
183
|
+
|
184
|
+
user_id = auth_response["user_id"]
|
185
|
+
console.print("Bot id", auth_response["bot_id"])
|
186
|
+
|
187
|
+
return user_id
|
188
|
+
|
189
|
+
|
190
|
+
def places_table(
|
191
|
+
channel_id_to_character: dict[str, Character], channels_by_id: dict[str, dict]
|
192
|
+
) -> Table:
|
193
|
+
|
194
|
+
table = Table(title="Characters in place")
|
195
|
+
table.add_column("Channel ID", justify="left")
|
196
|
+
table.add_column("Channel Name", justify="left")
|
197
|
+
table.add_column("Character", justify="left")
|
198
|
+
for channel_id, character in channel_id_to_character.items():
|
199
|
+
channel = channels_by_id[channel_id]
|
200
|
+
table.add_row(channel_id, channel["name"], character.name())
|
201
|
+
|
202
|
+
return table
|
203
|
+
|
204
|
+
|
205
|
+
def send_curtain_up(
|
206
|
+
socket_mode_client: SocketModeClient,
|
207
|
+
production: Production,
|
208
|
+
slack_admin_channel_id: str,
|
209
|
+
) -> None:
|
210
|
+
|
211
|
+
curtain_up_message = f"""
|
212
|
+
Proscenium 🎭 https://the-ai-alliance.github.io/proscenium/
|
213
|
+
|
214
|
+
```
|
215
|
+
{production.curtain_up_message()}
|
216
|
+
```
|
217
|
+
|
218
|
+
Curtain up.
|
219
|
+
"""
|
220
|
+
|
221
|
+
socket_mode_client.web_client.chat_postMessage(
|
222
|
+
channel=slack_admin_channel_id,
|
223
|
+
text=curtain_up_message,
|
224
|
+
)
|
225
|
+
|
226
|
+
|
227
|
+
def listen(
|
228
|
+
socket_mode_client: SocketModeClient,
|
229
|
+
slack_listener: SocketModeRequestListener,
|
230
|
+
user_id: str,
|
231
|
+
console: Console,
|
232
|
+
):
|
233
|
+
socket_mode_client.socket_mode_request_listeners.append(slack_listener)
|
234
|
+
|
235
|
+
try:
|
236
|
+
while True:
|
237
|
+
time.sleep(1)
|
238
|
+
except KeyboardInterrupt:
|
239
|
+
console.print("Exiting...")
|
240
|
+
|
241
|
+
|
242
|
+
def send_curtain_down(
|
243
|
+
socket_mode_client: SocketModeClient, slack_admin_channel_id: str
|
244
|
+
) -> None:
|
245
|
+
socket_mode_client.web_client.chat_postMessage(
|
246
|
+
channel=slack_admin_channel_id,
|
247
|
+
text="""Curtain down. We hope you enjoyed the show!""",
|
248
|
+
)
|
249
|
+
|
250
|
+
|
251
|
+
def shutdown(
|
252
|
+
socket_mode_client: SocketModeClient,
|
253
|
+
slack_listener: SocketModeRequestListener,
|
254
|
+
user_id: str,
|
255
|
+
production: Production,
|
256
|
+
console: Console,
|
257
|
+
):
|
258
|
+
|
259
|
+
socket_mode_client.socket_mode_request_listeners.remove(slack_listener)
|
260
|
+
socket_mode_client.disconnect()
|
261
|
+
console.print("Disconnected from Slack.")
|
262
|
+
|
263
|
+
production.curtain()
|
264
|
+
|
265
|
+
console.print("Handlers stopped.")
|
@@ -0,0 +1,51 @@
|
|
1
|
+
from typing import Optional
|
2
|
+
import logging
|
3
|
+
from rich.console import Console
|
4
|
+
from pymilvus import model
|
5
|
+
|
6
|
+
from proscenium.verbs.read import load_file
|
7
|
+
from proscenium.verbs.chunk import documents_to_chunks_by_characters
|
8
|
+
from proscenium.verbs.display.milvus import collection_panel
|
9
|
+
from proscenium.verbs.vector_database import vector_db
|
10
|
+
from proscenium.verbs.vector_database import create_collection
|
11
|
+
from proscenium.verbs.vector_database import add_chunks_to_vector_db
|
12
|
+
|
13
|
+
log = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
def load_chunks_from_files(
|
17
|
+
data_files: list[str],
|
18
|
+
milvus_uri: str,
|
19
|
+
embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
|
20
|
+
collection_name: str,
|
21
|
+
console: Optional[Console] = None,
|
22
|
+
) -> None:
|
23
|
+
|
24
|
+
vector_db_client = vector_db(milvus_uri)
|
25
|
+
log.info("Vector db stored at %s", milvus_uri)
|
26
|
+
|
27
|
+
for data_file in data_files:
|
28
|
+
|
29
|
+
log.info(
|
30
|
+
"Loading data file %s into vector db %s collection %s",
|
31
|
+
data_file,
|
32
|
+
milvus_uri,
|
33
|
+
collection_name,
|
34
|
+
)
|
35
|
+
create_collection(vector_db_client, embedding_fn, collection_name)
|
36
|
+
|
37
|
+
documents = load_file(data_file)
|
38
|
+
chunks = documents_to_chunks_by_characters(documents)
|
39
|
+
log.info("Data file %s has %s chunks", data_file, len(chunks))
|
40
|
+
|
41
|
+
info = add_chunks_to_vector_db(
|
42
|
+
vector_db_client,
|
43
|
+
embedding_fn,
|
44
|
+
chunks,
|
45
|
+
collection_name,
|
46
|
+
)
|
47
|
+
log.info("%s chunks inserted ", info["insert_count"])
|
48
|
+
if console is not None:
|
49
|
+
console.print(collection_panel(vector_db_client, collection_name))
|
50
|
+
|
51
|
+
vector_db_client.close()
|
@@ -15,6 +15,8 @@ from langchain_core.documents.base import Document
|
|
15
15
|
from proscenium.verbs.chunk import documents_to_chunks_by_tokens
|
16
16
|
from proscenium.verbs.extract import extract_to_pydantic_model
|
17
17
|
|
18
|
+
log = logging.getLogger(__name__)
|
19
|
+
|
18
20
|
|
19
21
|
def extract_from_document_chunks(
|
20
22
|
doc: Document,
|
@@ -42,7 +44,7 @@ def extract_from_document_chunks(
|
|
42
44
|
chunk.page_content,
|
43
45
|
)
|
44
46
|
|
45
|
-
|
47
|
+
log.info("Extract model in chunk %s of %s", i + 1, len(chunks))
|
46
48
|
if console is not None:
|
47
49
|
console.print(Panel(str(ce)))
|
48
50
|
|
@@ -79,4 +81,4 @@ def enrich_documents(
|
|
79
81
|
|
80
82
|
progress.update(task_enrich, advance=1)
|
81
83
|
|
82
|
-
|
84
|
+
log.info("Wrote document enrichments to %s", enrichments_jsonl_file)
|
@@ -14,6 +14,8 @@ from proscenium.verbs.vector_database import add_chunks_to_vector_db
|
|
14
14
|
from proscenium.verbs.vector_database import embedding_function
|
15
15
|
from proscenium.verbs.display.milvus import collection_panel
|
16
16
|
|
17
|
+
log = logging.getLogger(__name__)
|
18
|
+
|
17
19
|
|
18
20
|
class Resolver:
|
19
21
|
|
@@ -36,13 +38,13 @@ def load_entity_resolver(
|
|
36
38
|
console: Optional[Console] = None,
|
37
39
|
) -> None:
|
38
40
|
|
39
|
-
vector_db_client = vector_db(milvus_uri
|
40
|
-
|
41
|
+
vector_db_client = vector_db(milvus_uri)
|
42
|
+
log.info("Vector db stored at %s", milvus_uri)
|
41
43
|
|
42
|
-
|
44
|
+
embedding_fn = embedding_function(embedding_model_id)
|
45
|
+
log.info("Embedding model %s", embedding_model_id)
|
43
46
|
|
44
|
-
|
45
|
-
logging.info("Embedding model %s", embedding_model_id)
|
47
|
+
for resolver in resolvers:
|
46
48
|
|
47
49
|
values = []
|
48
50
|
with driver.session() as session:
|
@@ -50,16 +52,14 @@ def load_entity_resolver(
|
|
50
52
|
new_values = [Document(record[resolver.field_name]) for record in result]
|
51
53
|
values.extend(new_values)
|
52
54
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
create_collection(
|
57
|
-
vector_db_client, embedding_fn, resolver.collection_name, overwrite=True
|
58
|
-
)
|
55
|
+
log.info("Loading entity resolver into vector db %s", resolver.collection_name)
|
56
|
+
create_collection(vector_db_client, embedding_fn, resolver.collection_name)
|
57
|
+
|
59
58
|
info = add_chunks_to_vector_db(
|
60
59
|
vector_db_client, embedding_fn, values, resolver.collection_name
|
61
60
|
)
|
62
|
-
|
61
|
+
log.info("%s chunks inserted ", info["insert_count"])
|
62
|
+
|
63
63
|
if console is not None:
|
64
64
|
console.print(collection_panel(vector_db_client, resolver.collection_name))
|
65
65
|
|
@@ -72,10 +72,10 @@ def find_matching_objects(
|
|
72
72
|
resolver: Resolver,
|
73
73
|
) -> Optional[str]:
|
74
74
|
|
75
|
-
|
75
|
+
log.info("Loading collection", resolver.collection_name)
|
76
76
|
vector_db_client.load_collection(resolver.collection_name)
|
77
77
|
|
78
|
-
|
78
|
+
log.info(
|
79
79
|
"Finding entity matches for", approximate, "using", resolver.collection_name
|
80
80
|
)
|
81
81
|
|
@@ -88,8 +88,8 @@ def find_matching_objects(
|
|
88
88
|
)
|
89
89
|
# TODO apply distance threshold
|
90
90
|
for match in [head["entity"]["text"] for head in hits[:1]]:
|
91
|
-
|
91
|
+
log.info("Closest match:", match)
|
92
92
|
return match
|
93
93
|
|
94
|
-
|
94
|
+
log.info("No match found")
|
95
95
|
return None
|
@@ -9,6 +9,8 @@ from pydantic import BaseModel
|
|
9
9
|
from uuid import uuid4, UUID
|
10
10
|
from neo4j import Driver
|
11
11
|
|
12
|
+
log = logging.getLogger(__name__)
|
13
|
+
|
12
14
|
|
13
15
|
def query_to_prompts(
|
14
16
|
query: str,
|
@@ -28,28 +30,31 @@ def query_to_prompts(
|
|
28
30
|
[BaseModel], tuple[str, str]
|
29
31
|
], # Context -> (system_prompt, user_prompt)
|
30
32
|
console: Optional[Console] = None,
|
31
|
-
) -> str:
|
33
|
+
) -> Optional[tuple[str, str]]:
|
32
34
|
|
33
35
|
query_id = uuid4()
|
34
36
|
|
35
|
-
|
37
|
+
log.info("Extracting information from the question")
|
36
38
|
|
37
39
|
extract = query_extract(query, query_extraction_model_id)
|
38
40
|
if extract is None:
|
39
|
-
|
41
|
+
log.info("Unable to extract information from that question")
|
40
42
|
return None
|
41
43
|
|
42
|
-
|
44
|
+
log.info("Extract: %s", extract)
|
43
45
|
|
44
|
-
|
46
|
+
log.info("Storing the extracted information in the graph")
|
45
47
|
query_extract_to_graph(query, query_id, extract, driver)
|
46
48
|
|
47
|
-
|
49
|
+
log.info("Forming context from the extracted information")
|
48
50
|
context = query_extract_to_context(
|
49
51
|
extract, query, driver, milvus_uri, console=console
|
50
52
|
)
|
53
|
+
if context is None:
|
54
|
+
log.info("Unable to form context from the extracted information")
|
55
|
+
return None
|
51
56
|
|
52
|
-
|
57
|
+
log.info("Context: %s", context)
|
53
58
|
|
54
59
|
prompts = context_to_prompts(context)
|
55
60
|
|
@@ -9,6 +9,8 @@ from rich.progress import Progress
|
|
9
9
|
|
10
10
|
from neo4j import Driver
|
11
11
|
|
12
|
+
log = logging.getLogger(__name__)
|
13
|
+
|
12
14
|
|
13
15
|
def load_knowledge_graph(
|
14
16
|
driver: Driver,
|
@@ -17,7 +19,7 @@ def load_knowledge_graph(
|
|
17
19
|
doc_enrichments_to_graph: Callable[[Any, BaseModel], None],
|
18
20
|
) -> None:
|
19
21
|
|
20
|
-
|
22
|
+
log.info("Parsing enrichments from %s", enrichments_jsonl_file)
|
21
23
|
|
22
24
|
enrichmentss = []
|
23
25
|
with open(enrichments_jsonl_file, "r") as f:
|
@@ -1,8 +1,6 @@
|
|
1
|
-
from typing import List, Dict
|
1
|
+
from typing import List, Dict
|
2
2
|
import logging
|
3
3
|
|
4
|
-
from rich.console import Console
|
5
|
-
|
6
4
|
from pymilvus import MilvusClient
|
7
5
|
from pymilvus import model
|
8
6
|
|
@@ -10,6 +8,7 @@ from proscenium.verbs.complete import complete_simple
|
|
10
8
|
from proscenium.verbs.display.milvus import chunk_hits_table
|
11
9
|
from proscenium.verbs.vector_database import closest_chunks
|
12
10
|
|
11
|
+
log = logging.getLogger(__name__)
|
13
12
|
|
14
13
|
rag_system_prompt = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer."
|
15
14
|
|
@@ -44,16 +43,15 @@ def answer_question(
|
|
44
43
|
vector_db_client: MilvusClient,
|
45
44
|
embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
|
46
45
|
collection_name: str,
|
47
|
-
console: Optional[Console] = None,
|
48
46
|
) -> str:
|
49
47
|
|
50
48
|
chunks = closest_chunks(vector_db_client, embedding_fn, query, collection_name)
|
51
|
-
|
52
|
-
|
49
|
+
log.info("Found %s closest chunks", len(chunks))
|
50
|
+
log.info(chunk_hits_table(chunks))
|
53
51
|
|
54
52
|
prompt = rag_prompt(chunks, query)
|
55
|
-
|
53
|
+
log.info("RAG prompt created. Calling inference at %s", model_id)
|
56
54
|
|
57
|
-
answer = complete_simple(model_id, rag_system_prompt, prompt
|
55
|
+
answer = complete_simple(model_id, rag_system_prompt, prompt)
|
58
56
|
|
59
57
|
return answer
|
@@ -11,6 +11,8 @@ from proscenium.verbs.complete import (
|
|
11
11
|
complete_with_tool_results,
|
12
12
|
)
|
13
13
|
|
14
|
+
log = logging.getLogger(__name__)
|
15
|
+
|
14
16
|
|
15
17
|
def apply_tools(
|
16
18
|
model_id: str,
|
@@ -43,7 +45,7 @@ def apply_tools(
|
|
43
45
|
)
|
44
46
|
)
|
45
47
|
|
46
|
-
|
48
|
+
log.info("No tool applications detected")
|
47
49
|
|
48
50
|
return tool_call_message.content
|
49
51
|
|
proscenium/verbs/__init__.py
CHANGED
proscenium/verbs/chunk.py
CHANGED
@@ -8,6 +8,8 @@ from langchain_core.documents.base import Document
|
|
8
8
|
from langchain.text_splitter import CharacterTextSplitter
|
9
9
|
from langchain.text_splitter import TokenTextSplitter
|
10
10
|
|
11
|
+
log = logging.getLogger(__name__)
|
12
|
+
|
11
13
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
12
14
|
logging.getLogger("langchain_text_splitters.base").setLevel(logging.ERROR)
|
13
15
|
|
proscenium/verbs/complete.py
CHANGED
@@ -53,6 +53,8 @@ from aisuite.framework.message import ChatCompletionMessageToolCall
|
|
53
53
|
|
54
54
|
from proscenium.verbs.display.tools import complete_with_tools_panel
|
55
55
|
|
56
|
+
log = logging.getLogger(__name__)
|
57
|
+
|
56
58
|
provider_configs = {
|
57
59
|
# TODO expose this
|
58
60
|
"ollama": {"timeout": 180},
|
@@ -111,11 +113,11 @@ def evaluate_tool_call(tool_map: dict, tool_call: ChatCompletionMessageToolCall)
|
|
111
113
|
# TODO validate the arguments?
|
112
114
|
function_args = json.loads(tool_call.function.arguments)
|
113
115
|
|
114
|
-
|
116
|
+
log.info(f"Evaluating tool call: {function_name} with args {function_args}")
|
115
117
|
|
116
118
|
function_response = tool_map[function_name](**function_args)
|
117
119
|
|
118
|
-
|
120
|
+
log.info(f" Response: {function_response}")
|
119
121
|
|
120
122
|
return function_response
|
121
123
|
|
@@ -136,7 +138,7 @@ def evaluate_tool_calls(tool_call_message, tool_map: dict) -> list[dict]:
|
|
136
138
|
|
137
139
|
tool_call: ChatCompletionMessageToolCall
|
138
140
|
|
139
|
-
|
141
|
+
log.info("Evaluating tool calls")
|
140
142
|
|
141
143
|
new_messages: list[dict] = []
|
142
144
|
|
@@ -144,7 +146,7 @@ def evaluate_tool_calls(tool_call_message, tool_map: dict) -> list[dict]:
|
|
144
146
|
function_response = evaluate_tool_call(tool_map, tool_call)
|
145
147
|
new_messages.append(tool_response_message(tool_call, function_response))
|
146
148
|
|
147
|
-
|
149
|
+
log.info("Tool calls evaluated")
|
148
150
|
|
149
151
|
return new_messages
|
150
152
|
|
proscenium/verbs/display.py
CHANGED
proscenium/verbs/extract.py
CHANGED
@@ -8,6 +8,8 @@ from pydantic import BaseModel
|
|
8
8
|
|
9
9
|
from proscenium.verbs.complete import complete_simple
|
10
10
|
|
11
|
+
log = logging.getLogger(__name__)
|
12
|
+
|
11
13
|
extraction_system_prompt = "You are an entity extractor"
|
12
14
|
|
13
15
|
|
@@ -52,12 +54,12 @@ def extract_to_pydantic_model(
|
|
52
54
|
console=console,
|
53
55
|
)
|
54
56
|
|
55
|
-
|
57
|
+
log.info("complete_to_pydantic_model: extract_str = <<<%s>>>", extract_str)
|
56
58
|
|
57
59
|
try:
|
58
60
|
extract_dict = json.loads(extract_str)
|
59
61
|
return clazz.model_construct(**extract_dict)
|
60
62
|
except Exception as e:
|
61
|
-
|
63
|
+
log.error("complete_to_pydantic_model: Exception: %s", e)
|
62
64
|
|
63
65
|
return None
|
proscenium/verbs/invoke.py
CHANGED
proscenium/verbs/read.py
CHANGED
@@ -3,13 +3,18 @@ from typing import List
|
|
3
3
|
import os
|
4
4
|
import logging
|
5
5
|
|
6
|
-
|
6
|
+
import httpx
|
7
|
+
from pydantic.networks import HttpUrl
|
8
|
+
from pathlib import Path
|
7
9
|
|
10
|
+
from langchain_core.documents.base import Document
|
8
11
|
from langchain_community.document_loaders import TextLoader
|
9
12
|
from langchain_community.document_loaders.hugging_face_dataset import (
|
10
13
|
HuggingFaceDatasetLoader,
|
11
14
|
)
|
12
15
|
|
16
|
+
log = logging.getLogger(__name__)
|
17
|
+
|
13
18
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
14
19
|
logging.getLogger("langchain_text_splitters.base").setLevel(logging.ERROR)
|
15
20
|
|
@@ -34,11 +39,6 @@ def load_hugging_face_dataset(
|
|
34
39
|
return documents
|
35
40
|
|
36
41
|
|
37
|
-
import httpx
|
38
|
-
from pydantic.networks import HttpUrl
|
39
|
-
from pathlib import Path
|
40
|
-
|
41
|
-
|
42
42
|
async def url_to_file(url: HttpUrl, data_file: Path, overwrite: bool = False):
|
43
43
|
|
44
44
|
if data_file.exists() and not overwrite:
|
proscenium/verbs/remember.py
CHANGED
@@ -3,12 +3,15 @@ from typing import Dict, List
|
|
3
3
|
import logging
|
4
4
|
from pathlib import Path
|
5
5
|
from langchain_core.documents.base import Document
|
6
|
+
from urllib.parse import urlsplit
|
6
7
|
from pymilvus import MilvusClient
|
7
8
|
from pymilvus import DataType, FieldSchema, CollectionSchema
|
8
9
|
from pymilvus import model
|
9
10
|
|
10
11
|
# See https://milvus.io/docs/quickstart.md
|
11
12
|
|
13
|
+
log = logging.getLogger(__name__)
|
14
|
+
|
12
15
|
|
13
16
|
def embedding_function(
|
14
17
|
embedding_model_id: str,
|
@@ -40,34 +43,28 @@ def schema_chunks(
|
|
40
43
|
return schema
|
41
44
|
|
42
45
|
|
43
|
-
from urllib.parse import urlsplit
|
44
|
-
|
45
|
-
|
46
46
|
def vector_db(
|
47
47
|
uri: str,
|
48
|
-
overwrite: bool = False,
|
49
48
|
) -> MilvusClient:
|
50
49
|
|
50
|
+
log.info("Connecting to vector db %s", uri)
|
51
51
|
uri_fields = urlsplit(uri)
|
52
52
|
client = None
|
53
53
|
if uri_fields[0] == "file":
|
54
54
|
file_path = Path(uri_fields[2][1:])
|
55
55
|
if file_path.exists():
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
logging.info(
|
61
|
-
"Using existing %s file. Use overwrite=True to replace.",
|
62
|
-
uri_fields[2],
|
63
|
-
)
|
56
|
+
log.info(
|
57
|
+
"Using existing %s file.",
|
58
|
+
uri_fields[2],
|
59
|
+
)
|
64
60
|
else:
|
65
|
-
|
61
|
+
log.info("Creating new vector db file %s", file_path)
|
66
62
|
|
67
63
|
client = MilvusClient(uri=str(file_path))
|
68
64
|
|
69
65
|
else:
|
70
66
|
|
67
|
+
log.info("Connecting to vector db at non-file uri %s", uri)
|
71
68
|
client = MilvusClient(uri=uri)
|
72
69
|
|
73
70
|
return client
|
@@ -77,12 +74,8 @@ def create_collection(
|
|
77
74
|
client: MilvusClient,
|
78
75
|
embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
|
79
76
|
collection_name: str,
|
80
|
-
overwrite: bool = True,
|
81
77
|
) -> None:
|
82
78
|
|
83
|
-
if overwrite and client.has_collection(collection_name):
|
84
|
-
client.drop_collection(collection_name)
|
85
|
-
|
86
79
|
client.create_collection(
|
87
80
|
collection_name=collection_name,
|
88
81
|
schema=schema_chunks(embedding_fn),
|
@@ -100,7 +93,7 @@ def create_collection(
|
|
100
93
|
client.create_index(
|
101
94
|
collection_name=collection_name, index_params=index_params, sync=True
|
102
95
|
)
|
103
|
-
|
96
|
+
log.info("Created collection %s", collection_name)
|
104
97
|
|
105
98
|
|
106
99
|
def add_chunks_to_vector_db(
|
proscenium/verbs/write.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: proscenium
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.3
|
4
4
|
Summary: Frame AI Agents
|
5
5
|
License: ASFv2
|
6
6
|
Author: Adam Pingel
|
@@ -25,6 +25,7 @@ Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
|
|
25
25
|
Requires-Dist: pymilvus_model (>=0.3.1,<0.4.0)
|
26
26
|
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
27
27
|
Requires-Dist: rich (>=13.9.4,<14.0.0)
|
28
|
+
Requires-Dist: slack_sdk (>=3.35.0,<4.0.0)
|
28
29
|
Requires-Dist: stringcase (>=1.2.0,<2.0.0)
|
29
30
|
Requires-Dist: tiktoken (>=0.9.0,<0.10.0)
|
30
31
|
Requires-Dist: typer (>=0.15.2,<0.16.0)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
proscenium/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
2
|
+
proscenium/admin/__init__.py,sha256=VFcFIduWHmcM3idIbWOFGrdxp6oc5ta_ywWIL-hG4Y4,813
|
3
|
+
proscenium/bin/bot.py,sha256=nK4WN8ggpCr_KDpDI16Ib8RljmMpJyZ8qZTW8j7lwP4,3700
|
4
|
+
proscenium/core/__init__.py,sha256=MFOqGAKIK-XjbKaiFLihAX-zrsoRz4uNfCMahC4nJyc,3620
|
5
|
+
proscenium/interfaces/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
6
|
+
proscenium/interfaces/slack.py,sha256=kq4jVsTJUUDIrlO54ZIGqTvJoScZS3dcs1fBoNpg-m4,7947
|
7
|
+
proscenium/patterns/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
8
|
+
proscenium/patterns/chunk_space.py,sha256=kQzGUtkQKGQGzGjBqS7jz_zr3uvJeiCBD2leflOenM8,1635
|
9
|
+
proscenium/patterns/document_enricher.py,sha256=u-Q4FwvNAFj9nday235jUwxIwizht_VAW8DsmNQmoJs,2374
|
10
|
+
proscenium/patterns/entity_resolver.py,sha256=SOK9WO6WFuhSWPFBy4Hj09E2qgqrzxE0MT65nAgZDUw,2748
|
11
|
+
proscenium/patterns/graph_rag.py,sha256=1HH1xdlAA6ypvYdP4dWFm-KXrGPUmm0T4qIdAU8mgvE,1763
|
12
|
+
proscenium/patterns/knowledge_graph.py,sha256=VLjG8Rp7YfJLZKe9bZt2d4NsGloBV1AYI6SuaQtRLhs,1137
|
13
|
+
proscenium/patterns/rag.py,sha256=zvl_P48F3PDfVMgRXeiClLlevMsPKCMA1teVq9X20OE,1494
|
14
|
+
proscenium/patterns/tools.py,sha256=f2CD6f7CYiSs0Tm1Ff1sOL5Ti6DqJ5HQvMI7NmIgqNs,1740
|
15
|
+
proscenium/verbs/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
16
|
+
proscenium/verbs/chunk.py,sha256=hlVHfuR7sEAR8euh3FRd8hb2eJozE7bHe-E0RmAoFP8,1106
|
17
|
+
proscenium/verbs/complete.py,sha256=Y1T49OfAV7K8p0DMzE4aVqtkgVfjUqb6IeOazzdYGow,5071
|
18
|
+
proscenium/verbs/display/__init__.py,sha256=GXuvaMld8tzfJGngHdwVT-YLnuRmW2G0pMdti9Vj53s,238
|
19
|
+
proscenium/verbs/display/chat.py,sha256=2THBUdhG3cIIVZOnJ_AMYL4nWXKFG2cuSkX6wkm48yQ,1148
|
20
|
+
proscenium/verbs/display/milvus.py,sha256=GZze02_ZllukS7zVg0sCPxtjQ4z7O62nFY0Be4d5BX0,2390
|
21
|
+
proscenium/verbs/display/neo4j.py,sha256=yT3hulI1U7s4VmHh_UHHHxN25Q_rRgu6KM2so_pTFUI,727
|
22
|
+
proscenium/verbs/display/tools.py,sha256=eR5g-r7MGKFZY0qg-ndkW3p0mfbupV0UaAUFqJPfnNM,1491
|
23
|
+
proscenium/verbs/display.py,sha256=hHFmktyJtjYLi4I1-8HUfmsuoMTIxc6JFfczASBsCbI,260
|
24
|
+
proscenium/verbs/extract.py,sha256=2JSAblCww5Q2wiFXMib-D1iHKm6a5yj7AUYoyGoLQB8,1615
|
25
|
+
proscenium/verbs/invoke.py,sha256=-Bk7Pp0EEwRTS0MJUlViZeUNo8wxnDKJj5q78KU4CdM,339
|
26
|
+
proscenium/verbs/read.py,sha256=twFtcuyP-y-UwksLmGMCOjMqI7mp--VgvkGDfga6IxA,1262
|
27
|
+
proscenium/verbs/remember.py,sha256=Hh9BDRAYf7MGeMD4MzU73p6Q28KrSiLWPx4GjTW1amQ,296
|
28
|
+
proscenium/verbs/vector_database.py,sha256=U09P7jnpzUDeP7pEgJubf8xQsxC-O8Qb0MS0KY8eoe8,3527
|
29
|
+
proscenium/verbs/write.py,sha256=0GUJuixLnuu_EbFFzAIgrhLEQnOrL0TdUlMiqOl9KtA,367
|
30
|
+
proscenium-0.0.3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
proscenium-0.0.3.dist-info/METADATA,sha256=-CZihFinSAwy-OZz9iGBLKkQRsA3TlyzWy3VYJARvGs,2528
|
32
|
+
proscenium-0.0.3.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
33
|
+
proscenium-0.0.3.dist-info/entry_points.txt,sha256=Q05DVkPq_SjgD8mFN6bG5ae2r_UbsqKCdy2kDAtHYGU,57
|
34
|
+
proscenium-0.0.3.dist-info/RECORD,,
|
proscenium/scripts/__init__.py
DELETED
File without changes
|
@@ -1,39 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
|
3
|
-
from pymilvus import MilvusClient
|
4
|
-
from pymilvus import model
|
5
|
-
|
6
|
-
from proscenium.verbs.read import load_file
|
7
|
-
from proscenium.verbs.chunk import documents_to_chunks_by_characters
|
8
|
-
from proscenium.verbs.vector_database import create_collection
|
9
|
-
from proscenium.verbs.vector_database import add_chunks_to_vector_db
|
10
|
-
from proscenium.verbs.display.milvus import collection_panel
|
11
|
-
|
12
|
-
|
13
|
-
def make_vector_db_builder(
|
14
|
-
data_files: list[str],
|
15
|
-
vector_db_client: MilvusClient,
|
16
|
-
embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
|
17
|
-
collection_name: str,
|
18
|
-
):
|
19
|
-
|
20
|
-
def build():
|
21
|
-
|
22
|
-
create_collection(
|
23
|
-
vector_db_client, embedding_fn, collection_name, overwrite=True
|
24
|
-
)
|
25
|
-
|
26
|
-
for data_file in data_files:
|
27
|
-
|
28
|
-
documents = load_file(data_file)
|
29
|
-
chunks = documents_to_chunks_by_characters(documents)
|
30
|
-
logging.info("Data file %s has %s chunks", data_file, len(chunks))
|
31
|
-
|
32
|
-
info = add_chunks_to_vector_db(
|
33
|
-
vector_db_client, embedding_fn, chunks, collection_name
|
34
|
-
)
|
35
|
-
logging.info("%s chunks inserted", info["insert_count"])
|
36
|
-
|
37
|
-
logging.info(collection_panel(vector_db_client, collection_name))
|
38
|
-
|
39
|
-
return build
|
File without changes
|
@@ -1,29 +0,0 @@
|
|
1
|
-
proscenium/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
proscenium/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
3
|
-
proscenium/scripts/chunk_space.py,sha256=bLs1BbuLcYKYw-aFK47gC33oTGz94dRBNSeEwEbx2kU,1228
|
4
|
-
proscenium/scripts/document_enricher.py,sha256=STy3G248o6mkBiIxur_oRuxNoCRfUpEIF-fA2iMz9AI,2347
|
5
|
-
proscenium/scripts/entity_resolver.py,sha256=X5mbO2ihx_jnlAdwrd1Dwn0spL0v42Nqjahz5gNjln0,2827
|
6
|
-
proscenium/scripts/graph_rag.py,sha256=b2pPn7mtcl2-Z0tVRVUvK1D8-kW_-NzFbdMZacNd3eg,1612
|
7
|
-
proscenium/scripts/knowledge_graph.py,sha256=-ht8PVDC3gDsAYFVc6amAQxXCF1k6blmQ6jv4iawuuo,1106
|
8
|
-
proscenium/scripts/rag.py,sha256=iJ4tBuOW0Ro_KRzjFhlIEZvBf3JI6PyWgpR7zANS8XM,1572
|
9
|
-
proscenium/scripts/tools.py,sha256=yhO8zyPUb6Jb-5eL9cUGjLgG6BOmxgMQGT2WyVwB2h0,1709
|
10
|
-
proscenium/verbs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
|
-
proscenium/verbs/chunk.py,sha256=-unQS0xgTyTDsikbueNEvPXQhwK26bQYMZBAeAEvVds,1071
|
12
|
-
proscenium/verbs/complete.py,sha256=IA7xHLE0KimcYTtTaPHtfQmDEu9qn5849nVqyJmiTD4,5052
|
13
|
-
proscenium/verbs/display/__init__.py,sha256=GXuvaMld8tzfJGngHdwVT-YLnuRmW2G0pMdti9Vj53s,238
|
14
|
-
proscenium/verbs/display/chat.py,sha256=2THBUdhG3cIIVZOnJ_AMYL4nWXKFG2cuSkX6wkm48yQ,1148
|
15
|
-
proscenium/verbs/display/huggingface.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
|
-
proscenium/verbs/display/milvus.py,sha256=GZze02_ZllukS7zVg0sCPxtjQ4z7O62nFY0Be4d5BX0,2390
|
17
|
-
proscenium/verbs/display/neo4j.py,sha256=yT3hulI1U7s4VmHh_UHHHxN25Q_rRgu6KM2so_pTFUI,727
|
18
|
-
proscenium/verbs/display/tools.py,sha256=eR5g-r7MGKFZY0qg-ndkW3p0mfbupV0UaAUFqJPfnNM,1491
|
19
|
-
proscenium/verbs/display.py,sha256=rCgiLok2EmiHGK8yUBLqxZR148BwroPMqsDf7qQwO1g,210
|
20
|
-
proscenium/verbs/extract.py,sha256=5SMhhcVXYDKSDQH_nKCqFp4eem6YZ-ryGLqRBdAtI80,1588
|
21
|
-
proscenium/verbs/invoke.py,sha256=5szyVIuS3rd2zfyzbWzxVRFr2snf8XDJgzQuhQ85X9Y,289
|
22
|
-
proscenium/verbs/read.py,sha256=EaQkJotIRt9nSqIxaP5NOdfe5YA16uo2fPfwf8ZgHkk,1229
|
23
|
-
proscenium/verbs/remember.py,sha256=AVTVD2Xm0DKAqlhiqHpDf8VFZsoAAgTLqEipZGyqdjo,244
|
24
|
-
proscenium/verbs/vector_database.py,sha256=60I3hjaUgUnE7lEAF-4OZ9-CKKT4JPYntGTQMblMjfU,3761
|
25
|
-
proscenium/verbs/write.py,sha256=MboUV1828XHITMurtddkm6kXkEgvtzfpBmSLh5X_msc,317
|
26
|
-
proscenium-0.0.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
27
|
-
proscenium-0.0.2.dist-info/METADATA,sha256=NtZfBRc4Yx5IYcxfwKxNmIVYdwgwDRZJYpRoQKKoLKQ,2485
|
28
|
-
proscenium-0.0.2.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
|
29
|
-
proscenium-0.0.2.dist-info/RECORD,,
|
File without changes
|