proscenium 0.0.7__py3-none-any.whl → 0.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- proscenium/bin/__init__.py +42 -0
- proscenium/bin/bot.py +13 -95
- proscenium/interfaces/slack.py +73 -1
- proscenium/patterns/rag.py +3 -2
- proscenium/util/__init__.py +3 -6
- {proscenium-0.0.7.dist-info → proscenium-0.0.9.dist-info}/METADATA +2 -1
- proscenium-0.0.9.dist-info/RECORD +25 -0
- proscenium/patterns/chunk_space.py +0 -51
- proscenium/patterns/document_enricher.py +0 -84
- proscenium/patterns/entity_resolver.py +0 -95
- proscenium/patterns/knowledge_graph.py +0 -41
- proscenium/verbs/chunk.py +0 -42
- proscenium/verbs/display/milvus.py +0 -68
- proscenium/verbs/display/neo4j.py +0 -25
- proscenium/verbs/extract.py +0 -65
- proscenium/verbs/read.py +0 -53
- proscenium/verbs/vector_database.py +0 -139
- proscenium/verbs/write.py +0 -14
- proscenium-0.0.7.dist-info/RECORD +0 -35
- {proscenium-0.0.7.dist-info → proscenium-0.0.9.dist-info}/LICENSE +0 -0
- {proscenium-0.0.7.dist-info → proscenium-0.0.9.dist-info}/WHEEL +0 -0
- {proscenium-0.0.7.dist-info → proscenium-0.0.9.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,42 @@
|
|
1
|
+
from typing import Callable, Optional
|
2
|
+
|
3
|
+
import logging
|
4
|
+
import importlib
|
5
|
+
import yaml
|
6
|
+
from pathlib import Path
|
7
|
+
from rich.console import Console
|
8
|
+
from proscenium.core import Production
|
9
|
+
|
10
|
+
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
11
|
+
|
12
|
+
|
13
|
+
def load_config(config_file_name: Path) -> dict:
|
14
|
+
|
15
|
+
if not config_file_name.exists():
|
16
|
+
raise FileNotFoundError(
|
17
|
+
f"Configuration file {config_file_name} not found. "
|
18
|
+
"Please provide a valid configuration file."
|
19
|
+
)
|
20
|
+
|
21
|
+
with open(config_file_name, "r", encoding="utf-8") as f:
|
22
|
+
config = yaml.safe_load(f)
|
23
|
+
return config
|
24
|
+
|
25
|
+
|
26
|
+
def production_from_config(
|
27
|
+
config_file_name: Path,
|
28
|
+
get_secret: Callable[[str, str], str],
|
29
|
+
sub_console: Optional[Console] = None,
|
30
|
+
) -> tuple[Production, dict]:
|
31
|
+
|
32
|
+
config = load_config(config_file_name)
|
33
|
+
|
34
|
+
production_config = config.get("production", {})
|
35
|
+
|
36
|
+
production_module_name = production_config.get("module", None)
|
37
|
+
|
38
|
+
production_module = importlib.import_module(production_module_name, package=None)
|
39
|
+
|
40
|
+
production = production_module.make_production(config, get_secret, sub_console)
|
41
|
+
|
42
|
+
return production, config
|
proscenium/bin/bot.py
CHANGED
@@ -4,32 +4,12 @@ import os
|
|
4
4
|
import sys
|
5
5
|
import logging
|
6
6
|
import typer
|
7
|
-
import
|
7
|
+
from pathlib import Path
|
8
8
|
from rich.console import Console
|
9
9
|
|
10
|
-
from proscenium.admin import Admin
|
11
|
-
|
12
|
-
from proscenium.interfaces.slack import (
|
13
|
-
get_slack_auth,
|
14
|
-
channel_table,
|
15
|
-
bot_user_id,
|
16
|
-
places_table,
|
17
|
-
channel_maps,
|
18
|
-
make_slack_listener,
|
19
|
-
connect,
|
20
|
-
send_curtain_up,
|
21
|
-
listen,
|
22
|
-
send_curtain_down,
|
23
|
-
shutdown,
|
24
|
-
)
|
25
|
-
|
26
10
|
from proscenium.verbs.display import header
|
27
|
-
|
28
|
-
|
29
|
-
stream=sys.stdout,
|
30
|
-
format="%(asctime)s %(levelname)-8s %(name)s: %(message)s",
|
31
|
-
level=logging.WARNING,
|
32
|
-
)
|
11
|
+
from proscenium.bin import production_from_config
|
12
|
+
from proscenium.interfaces.slack import slack_main
|
33
13
|
|
34
14
|
logging.basicConfig(
|
35
15
|
stream=sys.stdout,
|
@@ -41,17 +21,16 @@ app = typer.Typer(help="Proscenium Bot")
|
|
41
21
|
|
42
22
|
log = logging.getLogger(__name__)
|
43
23
|
|
24
|
+
default_config_path = Path("demo/demo.yml")
|
25
|
+
|
44
26
|
|
45
27
|
@app.command(help="""Start the Proscenium Bot.""")
|
46
28
|
def start(
|
47
|
-
|
48
|
-
|
49
|
-
"
|
50
|
-
"-p",
|
51
|
-
"--production",
|
52
|
-
help="The name of the python module in PYTHONPATH in which the variable production of type proscenium.core.Production is defined.",
|
29
|
+
config_file: Path = typer.Option(
|
30
|
+
default_config_path,
|
31
|
+
help="The name of the Proscenium YAML configuration file.",
|
53
32
|
),
|
54
|
-
|
33
|
+
verbose: bool = False,
|
55
34
|
):
|
56
35
|
|
57
36
|
console = Console()
|
@@ -65,76 +44,15 @@ def start(
|
|
65
44
|
|
66
45
|
console.print(header())
|
67
46
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
# Note that the checking of the existence of the admin channel id is delayed
|
72
|
-
# until after the subscribed channels are shown.
|
73
|
-
|
74
|
-
production = production_module.make_production(slack_admin_channel_id, sub_console)
|
47
|
+
production, config = production_from_config(
|
48
|
+
config_file, os.environ.get, sub_console
|
49
|
+
)
|
75
50
|
|
76
51
|
console.print("Preparing props...")
|
77
52
|
production.prepare_props()
|
78
53
|
console.print("Props are up-to-date.")
|
79
54
|
|
80
|
-
|
81
|
-
|
82
|
-
socket_mode_client = connect(slack_app_token, slack_bot_token)
|
83
|
-
|
84
|
-
user_id = bot_user_id(socket_mode_client, console)
|
85
|
-
console.print()
|
86
|
-
|
87
|
-
channels_by_id, channel_name_to_id = channel_maps(socket_mode_client)
|
88
|
-
console.print(channel_table(channels_by_id))
|
89
|
-
console.print()
|
90
|
-
|
91
|
-
if slack_admin_channel_id is None:
|
92
|
-
raise ValueError(
|
93
|
-
"SLACK_ADMIN_CHANNEL_ID environment variable not set. "
|
94
|
-
"Please set it to the channel ID of the Proscenium admin channel."
|
95
|
-
)
|
96
|
-
if slack_admin_channel_id not in channels_by_id:
|
97
|
-
raise ValueError(
|
98
|
-
f"Admin channel {slack_admin_channel_id} not found in subscribed channels."
|
99
|
-
)
|
100
|
-
|
101
|
-
admin = Admin(slack_admin_channel_id)
|
102
|
-
log.info("Admin handler started.")
|
103
|
-
|
104
|
-
log.info("Places, please!")
|
105
|
-
channel_id_to_character = production.places(channel_name_to_id)
|
106
|
-
channel_id_to_character[slack_admin_channel_id] = admin
|
107
|
-
|
108
|
-
console.print(places_table(channel_id_to_character, channels_by_id))
|
109
|
-
console.print()
|
110
|
-
|
111
|
-
slack_listener = make_slack_listener(
|
112
|
-
user_id,
|
113
|
-
slack_admin_channel_id,
|
114
|
-
channels_by_id,
|
115
|
-
channel_id_to_character,
|
116
|
-
console,
|
117
|
-
)
|
118
|
-
|
119
|
-
send_curtain_up(socket_mode_client, production, slack_admin_channel_id)
|
120
|
-
|
121
|
-
console.print("Starting the show. Listening for events...")
|
122
|
-
listen(
|
123
|
-
socket_mode_client,
|
124
|
-
slack_listener,
|
125
|
-
user_id,
|
126
|
-
console,
|
127
|
-
)
|
128
|
-
|
129
|
-
send_curtain_down(socket_mode_client, slack_admin_channel_id)
|
130
|
-
|
131
|
-
shutdown(
|
132
|
-
socket_mode_client,
|
133
|
-
slack_listener,
|
134
|
-
user_id,
|
135
|
-
production,
|
136
|
-
console,
|
137
|
-
)
|
55
|
+
slack_main(production, config, console)
|
138
56
|
|
139
57
|
|
140
58
|
if __name__ == "__main__":
|
proscenium/interfaces/slack.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from typing import Callable
|
2
|
-
|
3
2
|
from typing import Generator
|
3
|
+
|
4
4
|
import time
|
5
5
|
import logging
|
6
6
|
import os
|
@@ -15,6 +15,7 @@ from slack_sdk.socket_mode.listeners import SocketModeRequestListener
|
|
15
15
|
|
16
16
|
from proscenium.core import Production
|
17
17
|
from proscenium.core import Character
|
18
|
+
from proscenium.admin import Admin
|
18
19
|
|
19
20
|
log = logging.getLogger(__name__)
|
20
21
|
|
@@ -277,3 +278,74 @@ def shutdown(
|
|
277
278
|
production.curtain()
|
278
279
|
|
279
280
|
console.print("Handlers stopped.")
|
281
|
+
|
282
|
+
|
283
|
+
def slack_main(
|
284
|
+
production: Production,
|
285
|
+
config: dict,
|
286
|
+
console: Console,
|
287
|
+
) -> None:
|
288
|
+
|
289
|
+
slack_app_token, slack_bot_token = get_slack_auth()
|
290
|
+
|
291
|
+
socket_mode_client = connect(slack_app_token, slack_bot_token)
|
292
|
+
|
293
|
+
user_id = bot_user_id(socket_mode_client, console)
|
294
|
+
console.print()
|
295
|
+
|
296
|
+
channels_by_id, channel_name_to_id = channel_maps(socket_mode_client)
|
297
|
+
console.print(channel_table(channels_by_id))
|
298
|
+
console.print()
|
299
|
+
|
300
|
+
slack_admin_channel = config.get("slack", {}).get("admin_channel", None)
|
301
|
+
|
302
|
+
if slack_admin_channel is None:
|
303
|
+
raise ValueError(
|
304
|
+
"slack.admin_channel is not set. "
|
305
|
+
"Please set it to the channel name of the Proscenium admin channel."
|
306
|
+
)
|
307
|
+
slack_admin_channel_id = channel_name_to_id.get(slack_admin_channel, None)
|
308
|
+
if slack_admin_channel_id is None:
|
309
|
+
raise ValueError(
|
310
|
+
f"Admin channel {slack_admin_channel} not found in subscribed channels."
|
311
|
+
)
|
312
|
+
|
313
|
+
admin = Admin(slack_admin_channel_id)
|
314
|
+
log.info(
|
315
|
+
"Admin handler started %s %s.", slack_admin_channel, slack_admin_channel_id
|
316
|
+
)
|
317
|
+
|
318
|
+
log.info("Places, please!")
|
319
|
+
channel_id_to_character = production.places(channel_name_to_id)
|
320
|
+
channel_id_to_character[slack_admin_channel_id] = admin
|
321
|
+
|
322
|
+
console.print(places_table(channel_id_to_character, channels_by_id))
|
323
|
+
console.print()
|
324
|
+
|
325
|
+
slack_listener = make_slack_listener(
|
326
|
+
user_id,
|
327
|
+
slack_admin_channel_id,
|
328
|
+
channels_by_id,
|
329
|
+
channel_id_to_character,
|
330
|
+
console,
|
331
|
+
)
|
332
|
+
|
333
|
+
send_curtain_up(socket_mode_client, production, slack_admin_channel_id)
|
334
|
+
|
335
|
+
console.print("Starting the show. Listening for events...")
|
336
|
+
listen(
|
337
|
+
socket_mode_client,
|
338
|
+
slack_listener,
|
339
|
+
user_id,
|
340
|
+
console,
|
341
|
+
)
|
342
|
+
|
343
|
+
send_curtain_down(socket_mode_client, slack_admin_channel_id)
|
344
|
+
|
345
|
+
shutdown(
|
346
|
+
socket_mode_client,
|
347
|
+
slack_listener,
|
348
|
+
user_id,
|
349
|
+
production,
|
350
|
+
console,
|
351
|
+
)
|
proscenium/patterns/rag.py
CHANGED
@@ -4,9 +4,10 @@ import logging
|
|
4
4
|
from pymilvus import MilvusClient
|
5
5
|
from pymilvus import model
|
6
6
|
|
7
|
+
from lapidarist.verbs.display.milvus import chunk_hits_table
|
8
|
+
from lapidarist.verbs.vector_database import closest_chunks
|
9
|
+
|
7
10
|
from proscenium.verbs.complete import complete_simple
|
8
|
-
from proscenium.verbs.display.milvus import chunk_hits_table
|
9
|
-
from proscenium.verbs.vector_database import closest_chunks
|
10
11
|
|
11
12
|
log = logging.getLogger(__name__)
|
12
13
|
|
proscenium/util/__init__.py
CHANGED
@@ -12,18 +12,15 @@ def get_secret(key: str, default: str = None) -> str:
|
|
12
12
|
|
13
13
|
try:
|
14
14
|
value = userdata.get(key)
|
15
|
-
print(
|
16
|
-
f"Using {key} from colab userdata and setting corresponding os.environ value"
|
17
|
-
)
|
18
15
|
os.environ[key] = value
|
16
|
+
log.info(
|
17
|
+
f"In colab. Read {key} from colab userdata and set os.environ value"
|
18
|
+
)
|
19
19
|
return value
|
20
20
|
except userdata.SecretNotFoundError:
|
21
|
-
print(f"Using default value for {key}")
|
22
21
|
return default
|
23
22
|
except ImportError:
|
24
23
|
if key in os.environ:
|
25
|
-
print(f"Not in colab. Using {key} from environment")
|
26
24
|
return os.environ.get(key, default)
|
27
25
|
else:
|
28
|
-
print(f"Using default value for {key}")
|
29
26
|
return default
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: proscenium
|
3
|
-
Version: 0.0.
|
3
|
+
Version: 0.0.9
|
4
4
|
Summary: Frame AI Agents
|
5
5
|
License: ASFv2
|
6
6
|
Author: Adam Pingel
|
@@ -18,6 +18,7 @@ Requires-Dist: docstring_parser (>=0.16,<0.17)
|
|
18
18
|
Requires-Dist: gofannon (>=0.25.13,<0.26.0)
|
19
19
|
Requires-Dist: langchain-community (>=0.3.18,<0.4.0)
|
20
20
|
Requires-Dist: langchain-huggingface (>=0.1.2,<0.2.0)
|
21
|
+
Requires-Dist: lapidarist (>=0.0.1,<0.0.2)
|
21
22
|
Requires-Dist: neo4j (>=5.28.1,<6.0.0)
|
22
23
|
Requires-Dist: pydantic (>=2.10.6,<3.0.0)
|
23
24
|
Requires-Dist: pymilvus (>=2.5.4,<3.0.0)
|
@@ -0,0 +1,25 @@
|
|
1
|
+
proscenium/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
2
|
+
proscenium/admin/__init__.py,sha256=GqGrkf7NOm3inuo67xszkARXZYKDsDs2jHuZh1tF4n0,924
|
3
|
+
proscenium/bin/__init__.py,sha256=ThVsDG6BmnZ86gYaZLGMDcNmR6fVwBLJG3k_JawfzOY,1160
|
4
|
+
proscenium/bin/bot.py,sha256=kdZBe1SGM-S3-QSN-DM-_UnGwe6W5D32s-dTuczIPCU,1358
|
5
|
+
proscenium/core/__init__.py,sha256=aSUqPMn2LjZ0C_l9Tx6yqqlfCzM7oSljZxHosJyjlLU,4335
|
6
|
+
proscenium/interfaces/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
7
|
+
proscenium/interfaces/slack.py,sha256=wjkdhqAHgISxdNG5Pwo-8_QIyQf76X7b-0WwZfNabI8,10600
|
8
|
+
proscenium/patterns/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
9
|
+
proscenium/patterns/graph_rag.py,sha256=1HH1xdlAA6ypvYdP4dWFm-KXrGPUmm0T4qIdAU8mgvE,1763
|
10
|
+
proscenium/patterns/rag.py,sha256=loeNWtcDkGjo2UpEIt6B0uBxRKkUtlOHdeCKNjHcupg,1495
|
11
|
+
proscenium/patterns/tools.py,sha256=f2CD6f7CYiSs0Tm1Ff1sOL5Ti6DqJ5HQvMI7NmIgqNs,1740
|
12
|
+
proscenium/util/__init__.py,sha256=FC1hjA37VvmVpF9-OlYNp9TjArH6etr6KiAvF9t_6lI,679
|
13
|
+
proscenium/verbs/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
14
|
+
proscenium/verbs/complete.py,sha256=Y1T49OfAV7K8p0DMzE4aVqtkgVfjUqb6IeOazzdYGow,5071
|
15
|
+
proscenium/verbs/display/__init__.py,sha256=GXuvaMld8tzfJGngHdwVT-YLnuRmW2G0pMdti9Vj53s,238
|
16
|
+
proscenium/verbs/display/chat.py,sha256=2THBUdhG3cIIVZOnJ_AMYL4nWXKFG2cuSkX6wkm48yQ,1148
|
17
|
+
proscenium/verbs/display/tools.py,sha256=eR5g-r7MGKFZY0qg-ndkW3p0mfbupV0UaAUFqJPfnNM,1491
|
18
|
+
proscenium/verbs/display.py,sha256=hHFmktyJtjYLi4I1-8HUfmsuoMTIxc6JFfczASBsCbI,260
|
19
|
+
proscenium/verbs/invoke.py,sha256=-Bk7Pp0EEwRTS0MJUlViZeUNo8wxnDKJj5q78KU4CdM,339
|
20
|
+
proscenium/verbs/remember.py,sha256=Hh9BDRAYf7MGeMD4MzU73p6Q28KrSiLWPx4GjTW1amQ,296
|
21
|
+
proscenium-0.0.9.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
22
|
+
proscenium-0.0.9.dist-info/METADATA,sha256=OEKKTo6yw_4rnR404Y7pdzuXhJw8yvNcQnzZUxN6juM,2530
|
23
|
+
proscenium-0.0.9.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
24
|
+
proscenium-0.0.9.dist-info/entry_points.txt,sha256=Q05DVkPq_SjgD8mFN6bG5ae2r_UbsqKCdy2kDAtHYGU,57
|
25
|
+
proscenium-0.0.9.dist-info/RECORD,,
|
@@ -1,51 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
import logging
|
3
|
-
from rich.console import Console
|
4
|
-
from pymilvus import model
|
5
|
-
|
6
|
-
from proscenium.verbs.read import load_file
|
7
|
-
from proscenium.verbs.chunk import documents_to_chunks_by_characters
|
8
|
-
from proscenium.verbs.display.milvus import collection_panel
|
9
|
-
from proscenium.verbs.vector_database import vector_db
|
10
|
-
from proscenium.verbs.vector_database import create_collection
|
11
|
-
from proscenium.verbs.vector_database import add_chunks_to_vector_db
|
12
|
-
|
13
|
-
log = logging.getLogger(__name__)
|
14
|
-
|
15
|
-
|
16
|
-
def load_chunks_from_files(
|
17
|
-
data_files: list[str],
|
18
|
-
milvus_uri: str,
|
19
|
-
embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
|
20
|
-
collection_name: str,
|
21
|
-
console: Optional[Console] = None,
|
22
|
-
) -> None:
|
23
|
-
|
24
|
-
vector_db_client = vector_db(milvus_uri)
|
25
|
-
log.info("Vector db stored at %s", milvus_uri)
|
26
|
-
|
27
|
-
for data_file in data_files:
|
28
|
-
|
29
|
-
log.info(
|
30
|
-
"Loading data file %s into vector db %s collection %s",
|
31
|
-
data_file,
|
32
|
-
milvus_uri,
|
33
|
-
collection_name,
|
34
|
-
)
|
35
|
-
create_collection(vector_db_client, embedding_fn, collection_name)
|
36
|
-
|
37
|
-
documents = load_file(data_file)
|
38
|
-
chunks = documents_to_chunks_by_characters(documents)
|
39
|
-
log.info("Data file %s has %s chunks", data_file, len(chunks))
|
40
|
-
|
41
|
-
info = add_chunks_to_vector_db(
|
42
|
-
vector_db_client,
|
43
|
-
embedding_fn,
|
44
|
-
chunks,
|
45
|
-
collection_name,
|
46
|
-
)
|
47
|
-
log.info("%s chunks inserted ", info["insert_count"])
|
48
|
-
if console is not None:
|
49
|
-
console.print(collection_panel(vector_db_client, collection_name))
|
50
|
-
|
51
|
-
vector_db_client.close()
|
@@ -1,84 +0,0 @@
|
|
1
|
-
from typing import List
|
2
|
-
from typing import Callable
|
3
|
-
from typing import Optional
|
4
|
-
|
5
|
-
import time
|
6
|
-
import logging
|
7
|
-
from pydantic import BaseModel
|
8
|
-
|
9
|
-
from rich.panel import Panel
|
10
|
-
from rich.console import Console
|
11
|
-
from rich.progress import Progress
|
12
|
-
|
13
|
-
from langchain_core.documents.base import Document
|
14
|
-
|
15
|
-
from proscenium.verbs.chunk import documents_to_chunks_by_tokens
|
16
|
-
from proscenium.verbs.extract import extract_to_pydantic_model
|
17
|
-
|
18
|
-
log = logging.getLogger(__name__)
|
19
|
-
|
20
|
-
|
21
|
-
def extract_from_document_chunks(
|
22
|
-
doc: Document,
|
23
|
-
doc_as_rich: Callable[[Document], Panel],
|
24
|
-
chunk_extraction_model_id: str,
|
25
|
-
chunk_extraction_template: str,
|
26
|
-
chunk_extract_clazz: type[BaseModel],
|
27
|
-
delay: float,
|
28
|
-
console: Optional[Console] = None,
|
29
|
-
) -> List[BaseModel]:
|
30
|
-
|
31
|
-
if console is not None:
|
32
|
-
console.print(doc_as_rich(doc))
|
33
|
-
console.print()
|
34
|
-
|
35
|
-
extract_models = []
|
36
|
-
|
37
|
-
chunks = documents_to_chunks_by_tokens([doc], chunk_size=1000, chunk_overlap=0)
|
38
|
-
for i, chunk in enumerate(chunks):
|
39
|
-
|
40
|
-
ce = extract_to_pydantic_model(
|
41
|
-
chunk_extraction_model_id,
|
42
|
-
chunk_extraction_template,
|
43
|
-
chunk_extract_clazz,
|
44
|
-
chunk.page_content,
|
45
|
-
)
|
46
|
-
|
47
|
-
log.info("Extract model in chunk %s of %s", i + 1, len(chunks))
|
48
|
-
if console is not None:
|
49
|
-
console.print(Panel(str(ce)))
|
50
|
-
|
51
|
-
extract_models.append(ce)
|
52
|
-
time.sleep(delay)
|
53
|
-
|
54
|
-
return extract_models
|
55
|
-
|
56
|
-
|
57
|
-
def enrich_documents(
|
58
|
-
retrieve_documents: Callable[[], List[Document]],
|
59
|
-
extract_from_doc_chunks: Callable[[Document], List[BaseModel]],
|
60
|
-
doc_enrichments: Callable[[Document, list[BaseModel]], BaseModel],
|
61
|
-
enrichments_jsonl_file: str,
|
62
|
-
console: Optional[Console] = None,
|
63
|
-
) -> None:
|
64
|
-
|
65
|
-
docs = retrieve_documents()
|
66
|
-
|
67
|
-
with Progress() as progress:
|
68
|
-
|
69
|
-
task_enrich = progress.add_task(
|
70
|
-
"[green]Enriching documents...", total=len(docs)
|
71
|
-
)
|
72
|
-
|
73
|
-
with open(enrichments_jsonl_file, "wt") as f:
|
74
|
-
|
75
|
-
for doc in docs:
|
76
|
-
|
77
|
-
chunk_extract_models = extract_from_doc_chunks(doc)
|
78
|
-
enrichments = doc_enrichments(doc, chunk_extract_models)
|
79
|
-
enrichments_json = enrichments.model_dump_json()
|
80
|
-
f.write(enrichments_json + "\n")
|
81
|
-
|
82
|
-
progress.update(task_enrich, advance=1)
|
83
|
-
|
84
|
-
log.info("Wrote document enrichments to %s", enrichments_jsonl_file)
|
@@ -1,95 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
import logging
|
3
|
-
|
4
|
-
from rich.console import Console
|
5
|
-
from langchain_core.documents.base import Document
|
6
|
-
from neo4j import Driver
|
7
|
-
|
8
|
-
from pymilvus import MilvusClient
|
9
|
-
|
10
|
-
from proscenium.verbs.vector_database import vector_db
|
11
|
-
from proscenium.verbs.vector_database import create_collection
|
12
|
-
from proscenium.verbs.vector_database import closest_chunks
|
13
|
-
from proscenium.verbs.vector_database import add_chunks_to_vector_db
|
14
|
-
from proscenium.verbs.vector_database import embedding_function
|
15
|
-
from proscenium.verbs.display.milvus import collection_panel
|
16
|
-
|
17
|
-
log = logging.getLogger(__name__)
|
18
|
-
|
19
|
-
|
20
|
-
class Resolver:
|
21
|
-
|
22
|
-
def __init__(
|
23
|
-
self,
|
24
|
-
cypher: str,
|
25
|
-
field_name: str,
|
26
|
-
collection_name: str,
|
27
|
-
):
|
28
|
-
self.cypher = cypher
|
29
|
-
self.field_name = field_name
|
30
|
-
self.collection_name = collection_name
|
31
|
-
|
32
|
-
|
33
|
-
def load_entity_resolver(
|
34
|
-
driver: Driver,
|
35
|
-
resolvers: list[Resolver],
|
36
|
-
embedding_model_id: str,
|
37
|
-
milvus_uri: str,
|
38
|
-
console: Optional[Console] = None,
|
39
|
-
) -> None:
|
40
|
-
|
41
|
-
vector_db_client = vector_db(milvus_uri)
|
42
|
-
log.info("Vector db stored at %s", milvus_uri)
|
43
|
-
|
44
|
-
embedding_fn = embedding_function(embedding_model_id)
|
45
|
-
log.info("Embedding model %s", embedding_model_id)
|
46
|
-
|
47
|
-
for resolver in resolvers:
|
48
|
-
|
49
|
-
values = []
|
50
|
-
with driver.session() as session:
|
51
|
-
result = session.run(resolver.cypher)
|
52
|
-
new_values = [Document(record[resolver.field_name]) for record in result]
|
53
|
-
values.extend(new_values)
|
54
|
-
|
55
|
-
log.info("Loading entity resolver into vector db %s", resolver.collection_name)
|
56
|
-
create_collection(vector_db_client, embedding_fn, resolver.collection_name)
|
57
|
-
|
58
|
-
info = add_chunks_to_vector_db(
|
59
|
-
vector_db_client, embedding_fn, values, resolver.collection_name
|
60
|
-
)
|
61
|
-
log.info("%s chunks inserted ", info["insert_count"])
|
62
|
-
|
63
|
-
if console is not None:
|
64
|
-
console.print(collection_panel(vector_db_client, resolver.collection_name))
|
65
|
-
|
66
|
-
vector_db_client.close()
|
67
|
-
|
68
|
-
|
69
|
-
def find_matching_objects(
|
70
|
-
vector_db_client: MilvusClient,
|
71
|
-
approximate: str,
|
72
|
-
resolver: Resolver,
|
73
|
-
) -> Optional[str]:
|
74
|
-
|
75
|
-
log.info("Loading collection", resolver.collection_name)
|
76
|
-
vector_db_client.load_collection(resolver.collection_name)
|
77
|
-
|
78
|
-
log.info(
|
79
|
-
"Finding entity matches for", approximate, "using", resolver.collection_name
|
80
|
-
)
|
81
|
-
|
82
|
-
hits = closest_chunks(
|
83
|
-
vector_db_client,
|
84
|
-
resolver.embedding_fn,
|
85
|
-
approximate,
|
86
|
-
resolver.collection_name,
|
87
|
-
k=5,
|
88
|
-
)
|
89
|
-
# TODO apply distance threshold
|
90
|
-
for match in [head["entity"]["text"] for head in hits[:1]]:
|
91
|
-
log.info("Closest match:", match)
|
92
|
-
return match
|
93
|
-
|
94
|
-
log.info("No match found")
|
95
|
-
return None
|
@@ -1,41 +0,0 @@
|
|
1
|
-
from typing import Callable
|
2
|
-
from typing import Any
|
3
|
-
|
4
|
-
import logging
|
5
|
-
import json
|
6
|
-
from pydantic import BaseModel
|
7
|
-
|
8
|
-
from rich.progress import Progress
|
9
|
-
|
10
|
-
from neo4j import Driver
|
11
|
-
|
12
|
-
log = logging.getLogger(__name__)
|
13
|
-
|
14
|
-
|
15
|
-
def load_knowledge_graph(
|
16
|
-
driver: Driver,
|
17
|
-
enrichments_jsonl_file: str,
|
18
|
-
enrichments_clazz: type[BaseModel],
|
19
|
-
doc_enrichments_to_graph: Callable[[Any, BaseModel], None],
|
20
|
-
) -> None:
|
21
|
-
|
22
|
-
log.info("Parsing enrichments from %s", enrichments_jsonl_file)
|
23
|
-
|
24
|
-
enrichmentss = []
|
25
|
-
with open(enrichments_jsonl_file, "r") as f:
|
26
|
-
for line in f:
|
27
|
-
e = enrichments_clazz.model_construct(**json.loads(line))
|
28
|
-
enrichmentss.append(e)
|
29
|
-
|
30
|
-
with Progress() as progress:
|
31
|
-
|
32
|
-
task_load = progress.add_task(
|
33
|
-
f"Loading {len(enrichmentss)} enriched documents into graph...",
|
34
|
-
total=len(enrichmentss),
|
35
|
-
)
|
36
|
-
|
37
|
-
with driver.session() as session:
|
38
|
-
session.run("MATCH (n) DETACH DELETE n") # empty graph
|
39
|
-
for e in enrichmentss:
|
40
|
-
session.execute_write(doc_enrichments_to_graph, e)
|
41
|
-
progress.update(task_load, advance=1)
|
proscenium/verbs/chunk.py
DELETED
@@ -1,42 +0,0 @@
|
|
1
|
-
import logging
|
2
|
-
import os
|
3
|
-
from typing import List
|
4
|
-
from typing import Iterable
|
5
|
-
|
6
|
-
from langchain_core.documents.base import Document
|
7
|
-
|
8
|
-
from langchain.text_splitter import CharacterTextSplitter
|
9
|
-
from langchain.text_splitter import TokenTextSplitter
|
10
|
-
|
11
|
-
log = logging.getLogger(__name__)
|
12
|
-
|
13
|
-
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
14
|
-
logging.getLogger("langchain_text_splitters.base").setLevel(logging.ERROR)
|
15
|
-
|
16
|
-
# Each text chunk inherits the metadata from the document.
|
17
|
-
|
18
|
-
|
19
|
-
def documents_to_chunks_by_characters(
|
20
|
-
documents: Iterable[Document], chunk_size: int = 1000, chunk_overlap: int = 0
|
21
|
-
) -> List[Document]:
|
22
|
-
|
23
|
-
text_splitter = CharacterTextSplitter(
|
24
|
-
chunk_size=chunk_size, chunk_overlap=chunk_overlap
|
25
|
-
)
|
26
|
-
|
27
|
-
chunks = text_splitter.split_documents(documents)
|
28
|
-
|
29
|
-
return chunks
|
30
|
-
|
31
|
-
|
32
|
-
def documents_to_chunks_by_tokens(
|
33
|
-
documents: Iterable[Document], chunk_size: int = 1000, chunk_overlap: int = 0
|
34
|
-
) -> List[Document]:
|
35
|
-
|
36
|
-
text_splitter = TokenTextSplitter(
|
37
|
-
chunk_size=chunk_size, chunk_overlap=chunk_overlap
|
38
|
-
)
|
39
|
-
|
40
|
-
chunks = text_splitter.split_documents(documents)
|
41
|
-
|
42
|
-
return chunks
|
@@ -1,68 +0,0 @@
|
|
1
|
-
from rich.table import Table
|
2
|
-
from rich.panel import Panel
|
3
|
-
from rich.text import Text
|
4
|
-
from rich.console import Group
|
5
|
-
from pymilvus import MilvusClient
|
6
|
-
|
7
|
-
|
8
|
-
def chunk_hits_table(chunks: list[dict]) -> Table:
|
9
|
-
|
10
|
-
table = Table(title="Closest Chunks", show_lines=True)
|
11
|
-
table.add_column("id", justify="right")
|
12
|
-
table.add_column("distance")
|
13
|
-
table.add_column("entity.text", justify="right")
|
14
|
-
for chunk in chunks:
|
15
|
-
table.add_row(str(chunk["id"]), str(chunk["distance"]), chunk["entity"]["text"])
|
16
|
-
return table
|
17
|
-
|
18
|
-
|
19
|
-
def collection_panel(client: MilvusClient, collection_name: str) -> Panel:
|
20
|
-
|
21
|
-
stats = client.get_collection_stats(collection_name)
|
22
|
-
desc = client.describe_collection(collection_name)
|
23
|
-
|
24
|
-
params_text = Text(
|
25
|
-
f"""
|
26
|
-
Collection Name: {desc['collection_name']}
|
27
|
-
Auto ID: {desc['auto_id']}
|
28
|
-
Num Shards: {desc['num_shards']}
|
29
|
-
Description: {desc['description']}
|
30
|
-
Functions: {desc['functions']}
|
31
|
-
Aliases: {desc['aliases']}
|
32
|
-
Collection ID: {desc['collection_id']}
|
33
|
-
Consistency Level: {desc['consistency_level']}
|
34
|
-
Properties: {desc['properties']}
|
35
|
-
Num Partitions: {desc['num_partitions']}
|
36
|
-
Enable Dynamic Field: {desc['enable_dynamic_field']}"""
|
37
|
-
)
|
38
|
-
|
39
|
-
params_panel = Panel(params_text, title="Params")
|
40
|
-
|
41
|
-
fields_table = Table(title="Fields", show_lines=True)
|
42
|
-
fields_table.add_column("id", justify="left")
|
43
|
-
fields_table.add_column("name", justify="left")
|
44
|
-
fields_table.add_column("description", justify="left")
|
45
|
-
fields_table.add_column("type", justify="left")
|
46
|
-
fields_table.add_column("params", justify="left")
|
47
|
-
fields_table.add_column("auto_id", justify="left")
|
48
|
-
fields_table.add_column("is_primary", justify="left")
|
49
|
-
for field in desc["fields"]:
|
50
|
-
fields_table.add_row(
|
51
|
-
str(field["field_id"]), # int
|
52
|
-
field["name"],
|
53
|
-
field["description"],
|
54
|
-
field["type"].name, # Milvus DataType
|
55
|
-
"\n".join([f"{k}: {v}" for k, v in field["params"].items()]),
|
56
|
-
str(field.get("auto_id", "-")), # bool
|
57
|
-
str(field.get("is_primary", "-")),
|
58
|
-
) # bool
|
59
|
-
|
60
|
-
stats_text = Text("\n".join([f"{k}: {v}" for k, v in stats.items()]))
|
61
|
-
stats_panel = Panel(stats_text, title="Stats")
|
62
|
-
|
63
|
-
panel = Panel(
|
64
|
-
Group(params_panel, fields_table, stats_panel),
|
65
|
-
title=f"Collection {collection_name}",
|
66
|
-
)
|
67
|
-
|
68
|
-
return panel
|
@@ -1,25 +0,0 @@
|
|
1
|
-
from typing import List
|
2
|
-
from rich.table import Table
|
3
|
-
|
4
|
-
|
5
|
-
def triples_table(triples: List[tuple[str, str, str]], title: str) -> Table:
|
6
|
-
|
7
|
-
table = Table(title=title, show_lines=False)
|
8
|
-
table.add_column("Subject", justify="left")
|
9
|
-
table.add_column("Predicate", justify="left")
|
10
|
-
table.add_column("Object", justify="left")
|
11
|
-
for triple in triples:
|
12
|
-
table.add_row(*triple)
|
13
|
-
|
14
|
-
return table
|
15
|
-
|
16
|
-
|
17
|
-
def pairs_table(subject_predicate_pairs: List[tuple[str, str]], title: str) -> Table:
|
18
|
-
|
19
|
-
table = Table(title=title, show_lines=False)
|
20
|
-
table.add_column("Subject", justify="left")
|
21
|
-
table.add_column("Predicate", justify="left")
|
22
|
-
for pair in subject_predicate_pairs:
|
23
|
-
table.add_row(*pair)
|
24
|
-
|
25
|
-
return table
|
proscenium/verbs/extract.py
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
from typing import Optional
|
2
|
-
import logging
|
3
|
-
from rich.console import Console
|
4
|
-
from string import Formatter
|
5
|
-
|
6
|
-
import json
|
7
|
-
from pydantic import BaseModel
|
8
|
-
|
9
|
-
from proscenium.verbs.complete import complete_simple
|
10
|
-
|
11
|
-
log = logging.getLogger(__name__)
|
12
|
-
|
13
|
-
extraction_system_prompt = "You are an entity extractor"
|
14
|
-
|
15
|
-
|
16
|
-
class PartialFormatter(Formatter):
|
17
|
-
def get_value(self, key, args, kwargs):
|
18
|
-
try:
|
19
|
-
return super().get_value(key, args, kwargs)
|
20
|
-
except KeyError:
|
21
|
-
return "{" + key + "}"
|
22
|
-
|
23
|
-
|
24
|
-
partial_formatter = PartialFormatter()
|
25
|
-
|
26
|
-
raw_extraction_template = """\
|
27
|
-
Below is a description of a data class for storing information extracted from text:
|
28
|
-
|
29
|
-
{extraction_description}
|
30
|
-
|
31
|
-
Find the information in the following text, and provide them in the specified JSON response format.
|
32
|
-
Only answer in JSON.:
|
33
|
-
|
34
|
-
{text}
|
35
|
-
"""
|
36
|
-
|
37
|
-
|
38
|
-
def extract_to_pydantic_model(
|
39
|
-
extraction_model_id: str,
|
40
|
-
extraction_template: str,
|
41
|
-
clazz: type[BaseModel],
|
42
|
-
text: str,
|
43
|
-
console: Optional[Console] = None,
|
44
|
-
) -> BaseModel:
|
45
|
-
|
46
|
-
extract_str = complete_simple(
|
47
|
-
extraction_model_id,
|
48
|
-
extraction_system_prompt,
|
49
|
-
extraction_template.format(text=text),
|
50
|
-
response_format={
|
51
|
-
"type": "json_object",
|
52
|
-
"schema": clazz.model_json_schema(),
|
53
|
-
},
|
54
|
-
console=console,
|
55
|
-
)
|
56
|
-
|
57
|
-
log.info("complete_to_pydantic_model: extract_str = <<<%s>>>", extract_str)
|
58
|
-
|
59
|
-
try:
|
60
|
-
extract_dict = json.loads(extract_str)
|
61
|
-
return clazz.model_construct(**extract_dict)
|
62
|
-
except Exception as e:
|
63
|
-
log.error("complete_to_pydantic_model: Exception: %s", e)
|
64
|
-
|
65
|
-
return None
|
proscenium/verbs/read.py
DELETED
@@ -1,53 +0,0 @@
|
|
1
|
-
from typing import List
|
2
|
-
|
3
|
-
import os
|
4
|
-
import logging
|
5
|
-
|
6
|
-
import httpx
|
7
|
-
from pydantic.networks import HttpUrl
|
8
|
-
from pathlib import Path
|
9
|
-
|
10
|
-
from langchain_core.documents.base import Document
|
11
|
-
from langchain_community.document_loaders import TextLoader
|
12
|
-
from langchain_community.document_loaders.hugging_face_dataset import (
|
13
|
-
HuggingFaceDatasetLoader,
|
14
|
-
)
|
15
|
-
|
16
|
-
log = logging.getLogger(__name__)
|
17
|
-
|
18
|
-
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
19
|
-
logging.getLogger("langchain_text_splitters.base").setLevel(logging.ERROR)
|
20
|
-
|
21
|
-
|
22
|
-
def load_file(filename: str) -> List[Document]:
|
23
|
-
|
24
|
-
loader = TextLoader(filename)
|
25
|
-
documents = loader.load()
|
26
|
-
|
27
|
-
return documents
|
28
|
-
|
29
|
-
|
30
|
-
def load_hugging_face_dataset(
|
31
|
-
dataset_name: str, page_content_column: str = "text"
|
32
|
-
) -> List[Document]:
|
33
|
-
|
34
|
-
loader = HuggingFaceDatasetLoader(
|
35
|
-
dataset_name, page_content_column=page_content_column
|
36
|
-
)
|
37
|
-
documents = loader.load()
|
38
|
-
|
39
|
-
return documents
|
40
|
-
|
41
|
-
|
42
|
-
async def url_to_file(url: HttpUrl, data_file: Path, overwrite: bool = False):
|
43
|
-
|
44
|
-
if data_file.exists() and not overwrite:
|
45
|
-
return
|
46
|
-
|
47
|
-
async with httpx.AsyncClient() as client:
|
48
|
-
|
49
|
-
response = await client.get(url)
|
50
|
-
response.raise_for_status()
|
51
|
-
|
52
|
-
with open(data_file, "wb") as file:
|
53
|
-
file.write(response.content)
|
@@ -1,139 +0,0 @@
|
|
1
|
-
from typing import Dict, List
|
2
|
-
|
3
|
-
import logging
|
4
|
-
from pathlib import Path
|
5
|
-
from langchain_core.documents.base import Document
|
6
|
-
from urllib.parse import urlsplit
|
7
|
-
from pymilvus import MilvusClient
|
8
|
-
from pymilvus import DataType, FieldSchema, CollectionSchema
|
9
|
-
from pymilvus import model
|
10
|
-
|
11
|
-
# See https://milvus.io/docs/quickstart.md
|
12
|
-
|
13
|
-
log = logging.getLogger(__name__)
|
14
|
-
|
15
|
-
|
16
|
-
def embedding_function(
|
17
|
-
embedding_model_id: str,
|
18
|
-
) -> model.dense.SentenceTransformerEmbeddingFunction:
|
19
|
-
embedding_fn = model.dense.SentenceTransformerEmbeddingFunction(
|
20
|
-
model_name=embedding_model_id, device="cpu" # or 'cuda:0'
|
21
|
-
)
|
22
|
-
return embedding_fn
|
23
|
-
|
24
|
-
|
25
|
-
def schema_chunks(
|
26
|
-
embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
|
27
|
-
) -> CollectionSchema:
|
28
|
-
|
29
|
-
field_id = FieldSchema(
|
30
|
-
name="id", dtype=DataType.INT64, is_primary=True, auto_id=True
|
31
|
-
)
|
32
|
-
field_text = FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=50000)
|
33
|
-
field_vector = FieldSchema(
|
34
|
-
name="vector", dtype=DataType.FLOAT_VECTOR, dim=embedding_fn.dim
|
35
|
-
)
|
36
|
-
|
37
|
-
schema = CollectionSchema(
|
38
|
-
fields=[field_id, field_text, field_vector],
|
39
|
-
description="Chunks Schema",
|
40
|
-
enable_dynamic_field=True,
|
41
|
-
)
|
42
|
-
|
43
|
-
return schema
|
44
|
-
|
45
|
-
|
46
|
-
def vector_db(
|
47
|
-
uri: str,
|
48
|
-
) -> MilvusClient:
|
49
|
-
|
50
|
-
log.info("Connecting to vector db %s", uri)
|
51
|
-
uri_fields = urlsplit(uri)
|
52
|
-
client = None
|
53
|
-
if uri_fields[0] == "file":
|
54
|
-
file_path = Path(uri_fields[2][1:])
|
55
|
-
if file_path.exists():
|
56
|
-
log.info(
|
57
|
-
"Using existing %s file.",
|
58
|
-
uri_fields[2],
|
59
|
-
)
|
60
|
-
else:
|
61
|
-
log.info("Creating new vector db file %s", file_path)
|
62
|
-
|
63
|
-
client = MilvusClient(uri=str(file_path))
|
64
|
-
|
65
|
-
else:
|
66
|
-
|
67
|
-
log.info("Connecting to vector db at non-file uri %s", uri)
|
68
|
-
client = MilvusClient(uri=uri)
|
69
|
-
|
70
|
-
return client
|
71
|
-
|
72
|
-
|
73
|
-
def create_collection(
|
74
|
-
client: MilvusClient,
|
75
|
-
embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
|
76
|
-
collection_name: str,
|
77
|
-
) -> None:
|
78
|
-
|
79
|
-
client.create_collection(
|
80
|
-
collection_name=collection_name,
|
81
|
-
schema=schema_chunks(embedding_fn),
|
82
|
-
)
|
83
|
-
|
84
|
-
index_params = client.prepare_index_params()
|
85
|
-
|
86
|
-
index_params.add_index(
|
87
|
-
field_name="vector",
|
88
|
-
index_type="IVF_FLAT",
|
89
|
-
metric_type="IP",
|
90
|
-
params={"nlist": 1024},
|
91
|
-
)
|
92
|
-
|
93
|
-
client.create_index(
|
94
|
-
collection_name=collection_name, index_params=index_params, sync=True
|
95
|
-
)
|
96
|
-
log.info("Created collection %s", collection_name)
|
97
|
-
|
98
|
-
|
99
|
-
def add_chunks_to_vector_db(
|
100
|
-
client: MilvusClient,
|
101
|
-
embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
|
102
|
-
chunks: List[Document],
|
103
|
-
collection_name: str,
|
104
|
-
) -> Dict:
|
105
|
-
|
106
|
-
vectors = embedding_fn.encode_documents([chunk.page_content for chunk in chunks])
|
107
|
-
|
108
|
-
data = [
|
109
|
-
{"text": chunk.page_content, "vector": vector}
|
110
|
-
for chunk, vector in zip(chunks, vectors)
|
111
|
-
]
|
112
|
-
|
113
|
-
insert_result = client.insert(collection_name, data)
|
114
|
-
|
115
|
-
return insert_result
|
116
|
-
|
117
|
-
|
118
|
-
def closest_chunks(
|
119
|
-
client: MilvusClient,
|
120
|
-
embedding_fn: model.dense.SentenceTransformerEmbeddingFunction,
|
121
|
-
query: str,
|
122
|
-
collection_name: str,
|
123
|
-
k: int = 4,
|
124
|
-
) -> List[Dict]:
|
125
|
-
|
126
|
-
client.load_collection(collection_name)
|
127
|
-
|
128
|
-
result = client.search(
|
129
|
-
collection_name=collection_name,
|
130
|
-
data=embedding_fn.encode_queries([query]),
|
131
|
-
anns_field="vector",
|
132
|
-
search_params={"metric": "IP", "offset": 0},
|
133
|
-
output_fields=["text"],
|
134
|
-
limit=k,
|
135
|
-
)
|
136
|
-
|
137
|
-
hits = result[0]
|
138
|
-
|
139
|
-
return hits
|
proscenium/verbs/write.py
DELETED
@@ -1,14 +0,0 @@
|
|
1
|
-
from typing import List
|
2
|
-
|
3
|
-
import logging
|
4
|
-
import csv
|
5
|
-
|
6
|
-
log = logging.getLogger(__name__)
|
7
|
-
|
8
|
-
|
9
|
-
def triples_to_csv(triples: List[tuple[str, str, str]], filename: str) -> None:
|
10
|
-
|
11
|
-
with open(filename, "wt") as f:
|
12
|
-
writer = csv.writer(f, delimiter=",", quotechar='"')
|
13
|
-
writer.writerow(["entity", "role", "case name"]) # header
|
14
|
-
writer.writerows(triples)
|
@@ -1,35 +0,0 @@
|
|
1
|
-
proscenium/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
2
|
-
proscenium/admin/__init__.py,sha256=GqGrkf7NOm3inuo67xszkARXZYKDsDs2jHuZh1tF4n0,924
|
3
|
-
proscenium/bin/bot.py,sha256=nK4WN8ggpCr_KDpDI16Ib8RljmMpJyZ8qZTW8j7lwP4,3700
|
4
|
-
proscenium/core/__init__.py,sha256=aSUqPMn2LjZ0C_l9Tx6yqqlfCzM7oSljZxHosJyjlLU,4335
|
5
|
-
proscenium/interfaces/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
6
|
-
proscenium/interfaces/slack.py,sha256=Arv3JT91gTZ_ZzizsOa9u1Wbbs8CnORr-KUzJzluy3g,8591
|
7
|
-
proscenium/patterns/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
8
|
-
proscenium/patterns/chunk_space.py,sha256=kQzGUtkQKGQGzGjBqS7jz_zr3uvJeiCBD2leflOenM8,1635
|
9
|
-
proscenium/patterns/document_enricher.py,sha256=u-Q4FwvNAFj9nday235jUwxIwizht_VAW8DsmNQmoJs,2374
|
10
|
-
proscenium/patterns/entity_resolver.py,sha256=SOK9WO6WFuhSWPFBy4Hj09E2qgqrzxE0MT65nAgZDUw,2748
|
11
|
-
proscenium/patterns/graph_rag.py,sha256=1HH1xdlAA6ypvYdP4dWFm-KXrGPUmm0T4qIdAU8mgvE,1763
|
12
|
-
proscenium/patterns/knowledge_graph.py,sha256=VLjG8Rp7YfJLZKe9bZt2d4NsGloBV1AYI6SuaQtRLhs,1137
|
13
|
-
proscenium/patterns/rag.py,sha256=zvl_P48F3PDfVMgRXeiClLlevMsPKCMA1teVq9X20OE,1494
|
14
|
-
proscenium/patterns/tools.py,sha256=f2CD6f7CYiSs0Tm1Ff1sOL5Ti6DqJ5HQvMI7NmIgqNs,1740
|
15
|
-
proscenium/util/__init__.py,sha256=M1Fs2KD4JJJsJAStp8gdWD1DZn2N9IqFjhKYgHATKlM,854
|
16
|
-
proscenium/verbs/__init__.py,sha256=nDWNd6_TSf4vDQuHVBoAf4QfZCB3ZUFQ0M7XvifNJ-g,78
|
17
|
-
proscenium/verbs/chunk.py,sha256=hlVHfuR7sEAR8euh3FRd8hb2eJozE7bHe-E0RmAoFP8,1106
|
18
|
-
proscenium/verbs/complete.py,sha256=Y1T49OfAV7K8p0DMzE4aVqtkgVfjUqb6IeOazzdYGow,5071
|
19
|
-
proscenium/verbs/display/__init__.py,sha256=GXuvaMld8tzfJGngHdwVT-YLnuRmW2G0pMdti9Vj53s,238
|
20
|
-
proscenium/verbs/display/chat.py,sha256=2THBUdhG3cIIVZOnJ_AMYL4nWXKFG2cuSkX6wkm48yQ,1148
|
21
|
-
proscenium/verbs/display/milvus.py,sha256=GZze02_ZllukS7zVg0sCPxtjQ4z7O62nFY0Be4d5BX0,2390
|
22
|
-
proscenium/verbs/display/neo4j.py,sha256=yT3hulI1U7s4VmHh_UHHHxN25Q_rRgu6KM2so_pTFUI,727
|
23
|
-
proscenium/verbs/display/tools.py,sha256=eR5g-r7MGKFZY0qg-ndkW3p0mfbupV0UaAUFqJPfnNM,1491
|
24
|
-
proscenium/verbs/display.py,sha256=hHFmktyJtjYLi4I1-8HUfmsuoMTIxc6JFfczASBsCbI,260
|
25
|
-
proscenium/verbs/extract.py,sha256=2JSAblCww5Q2wiFXMib-D1iHKm6a5yj7AUYoyGoLQB8,1615
|
26
|
-
proscenium/verbs/invoke.py,sha256=-Bk7Pp0EEwRTS0MJUlViZeUNo8wxnDKJj5q78KU4CdM,339
|
27
|
-
proscenium/verbs/read.py,sha256=twFtcuyP-y-UwksLmGMCOjMqI7mp--VgvkGDfga6IxA,1262
|
28
|
-
proscenium/verbs/remember.py,sha256=Hh9BDRAYf7MGeMD4MzU73p6Q28KrSiLWPx4GjTW1amQ,296
|
29
|
-
proscenium/verbs/vector_database.py,sha256=U09P7jnpzUDeP7pEgJubf8xQsxC-O8Qb0MS0KY8eoe8,3527
|
30
|
-
proscenium/verbs/write.py,sha256=0GUJuixLnuu_EbFFzAIgrhLEQnOrL0TdUlMiqOl9KtA,367
|
31
|
-
proscenium-0.0.7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
32
|
-
proscenium-0.0.7.dist-info/METADATA,sha256=w5WsPpSRx9yYSYyQPdVs0ZhPr01zZjUXUUv83vaEBig,2487
|
33
|
-
proscenium-0.0.7.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
34
|
-
proscenium-0.0.7.dist-info/entry_points.txt,sha256=Q05DVkPq_SjgD8mFN6bG5ae2r_UbsqKCdy2kDAtHYGU,57
|
35
|
-
proscenium-0.0.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|