cwyodmodules 0.3.80__py3-none-any.whl → 0.3.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cwyodmodules/batch/utilities/document_chunking/fixed_size_overlap.py +53 -3
- cwyodmodules/batch/utilities/document_chunking/layout.py +49 -3
- cwyodmodules/batch/utilities/document_chunking/page.py +48 -3
- cwyodmodules/batch/utilities/document_loading/web.py +57 -2
- cwyodmodules/batch/utilities/helpers/azure_search_helper.py +4 -13
- cwyodmodules/batch/utilities/helpers/config/config_helper.py +5 -10
- cwyodmodules/batch/utilities/helpers/config/default.json +1 -3
- cwyodmodules/batch/utilities/helpers/env_helper.py +4 -6
- cwyodmodules/batch/utilities/helpers/llm_helper.py +21 -58
- cwyodmodules/batch/utilities/helpers/orchestrator_helper.py +5 -14
- cwyodmodules/batch/utilities/orchestrator/__init__.py +2 -17
- cwyodmodules/batch/utilities/orchestrator/semantic_kernel_orchestrator.py +154 -22
- {cwyodmodules-0.3.80.dist-info → cwyodmodules-0.3.83.dist-info}/METADATA +1 -5
- {cwyodmodules-0.3.80.dist-info → cwyodmodules-0.3.83.dist-info}/RECORD +17 -23
- cwyodmodules/batch/utilities/orchestrator/lang_chain_agent.py +0 -174
- cwyodmodules/batch/utilities/orchestrator/open_ai_functions.py +0 -196
- cwyodmodules/batch/utilities/orchestrator/orchestration_strategy.py +0 -18
- cwyodmodules/batch/utilities/orchestrator/orchestrator_base.py +0 -170
- cwyodmodules/batch/utilities/orchestrator/prompt_flow.py +0 -195
- cwyodmodules/batch/utilities/orchestrator/strategies.py +0 -29
- {cwyodmodules-0.3.80.dist-info → cwyodmodules-0.3.83.dist-info}/WHEEL +0 -0
- {cwyodmodules-0.3.80.dist-info → cwyodmodules-0.3.83.dist-info}/licenses/LICENSE +0 -0
- {cwyodmodules-0.3.80.dist-info → cwyodmodules-0.3.83.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,5 @@
|
|
1
1
|
from typing import List
|
2
2
|
from .document_chunking_base import DocumentChunkingBase
|
3
|
-
from langchain.text_splitter import TokenTextSplitter
|
4
3
|
from .chunking_strategy import ChunkingSettings
|
5
4
|
from ..common.source_document import SourceDocument
|
6
5
|
from ...utilities.helpers.env_helper import EnvHelper
|
@@ -10,6 +9,54 @@ log_execution = env_helper.LOG_EXECUTION
|
|
10
9
|
log_args = env_helper.LOG_ARGS
|
11
10
|
log_result = env_helper.LOG_RESULT
|
12
11
|
|
12
|
+
|
13
|
+
class SimpleTokenSplitter:
|
14
|
+
"""Simple token-based text splitter to replace LangChain's TokenTextSplitter."""
|
15
|
+
|
16
|
+
def __init__(self, chunk_size: int, chunk_overlap: int):
|
17
|
+
self.chunk_size = chunk_size
|
18
|
+
self.chunk_overlap = chunk_overlap
|
19
|
+
|
20
|
+
def split_text(self, text: str) -> List[str]:
|
21
|
+
"""Split text into chunks based on approximate token count."""
|
22
|
+
if not text:
|
23
|
+
return []
|
24
|
+
|
25
|
+
# Rough approximation: 1 token ≈ 4 characters
|
26
|
+
char_chunk_size = self.chunk_size * 4
|
27
|
+
char_overlap = self.chunk_overlap * 4
|
28
|
+
|
29
|
+
chunks = []
|
30
|
+
start = 0
|
31
|
+
|
32
|
+
while start < len(text):
|
33
|
+
# Calculate end position
|
34
|
+
end = start + char_chunk_size
|
35
|
+
|
36
|
+
# If this is not the last chunk, try to find a good break point
|
37
|
+
if end < len(text):
|
38
|
+
# Look for sentence endings, then paragraph breaks, then word boundaries
|
39
|
+
for break_char in ['. ', '.\n', '\n\n', '\n', ' ']:
|
40
|
+
break_pos = text.rfind(break_char, start, end)
|
41
|
+
if break_pos > start:
|
42
|
+
end = break_pos + len(break_char)
|
43
|
+
break
|
44
|
+
|
45
|
+
# Extract chunk
|
46
|
+
chunk = text[start:end].strip()
|
47
|
+
if chunk:
|
48
|
+
chunks.append(chunk)
|
49
|
+
|
50
|
+
# Move start position (with overlap)
|
51
|
+
start = max(start + 1, end - char_overlap)
|
52
|
+
|
53
|
+
# Prevent infinite loop
|
54
|
+
if start >= len(text):
|
55
|
+
break
|
56
|
+
|
57
|
+
return chunks
|
58
|
+
|
59
|
+
|
13
60
|
class FixedSizeOverlapDocumentChunking(DocumentChunkingBase):
|
14
61
|
def __init__(self) -> None:
|
15
62
|
pass
|
@@ -28,10 +75,13 @@ class FixedSizeOverlapDocumentChunking(DocumentChunkingBase):
|
|
28
75
|
logger.error("No documents provided for chunking.")
|
29
76
|
logger.debug(e)
|
30
77
|
document_url = None
|
31
|
-
|
32
|
-
|
78
|
+
|
79
|
+
splitter = SimpleTokenSplitter(
|
80
|
+
chunk_size=chunking.chunk_size,
|
81
|
+
chunk_overlap=chunking.chunk_overlap
|
33
82
|
)
|
34
83
|
chunked_content_list = splitter.split_text(full_document_content)
|
84
|
+
|
35
85
|
# Create document for each chunk
|
36
86
|
documents = []
|
37
87
|
chunk_offset = 0
|
@@ -1,6 +1,5 @@
|
|
1
1
|
from typing import List
|
2
2
|
from .document_chunking_base import DocumentChunkingBase
|
3
|
-
from langchain.text_splitter import MarkdownTextSplitter
|
4
3
|
from .chunking_strategy import ChunkingSettings
|
5
4
|
from ..common.source_document import SourceDocument
|
6
5
|
from ...utilities.helpers.env_helper import EnvHelper
|
@@ -11,6 +10,50 @@ log_execution = env_helper.LOG_EXECUTION
|
|
11
10
|
log_args = env_helper.LOG_ARGS
|
12
11
|
log_result = env_helper.LOG_RESULT
|
13
12
|
|
13
|
+
|
14
|
+
class SimpleTextSplitter:
|
15
|
+
"""Simple text splitter to replace LangChain's MarkdownTextSplitter."""
|
16
|
+
|
17
|
+
def __init__(self, chunk_size: int, chunk_overlap: int):
|
18
|
+
self.chunk_size = chunk_size
|
19
|
+
self.chunk_overlap = chunk_overlap
|
20
|
+
|
21
|
+
def split_text(self, text: str) -> List[str]:
|
22
|
+
"""Split text into chunks with overlap."""
|
23
|
+
if not text:
|
24
|
+
return []
|
25
|
+
|
26
|
+
chunks = []
|
27
|
+
start = 0
|
28
|
+
|
29
|
+
while start < len(text):
|
30
|
+
# Calculate end position
|
31
|
+
end = start + self.chunk_size
|
32
|
+
|
33
|
+
# If this is not the last chunk, try to find a good break point
|
34
|
+
if end < len(text):
|
35
|
+
# Look for sentence endings, then paragraph breaks, then word boundaries
|
36
|
+
for break_char in ['. ', '.\n', '\n\n', '\n', ' ']:
|
37
|
+
break_pos = text.rfind(break_char, start, end)
|
38
|
+
if break_pos > start:
|
39
|
+
end = break_pos + len(break_char)
|
40
|
+
break
|
41
|
+
|
42
|
+
# Extract chunk
|
43
|
+
chunk = text[start:end].strip()
|
44
|
+
if chunk:
|
45
|
+
chunks.append(chunk)
|
46
|
+
|
47
|
+
# Move start position (with overlap)
|
48
|
+
start = max(start + 1, end - self.chunk_overlap)
|
49
|
+
|
50
|
+
# Prevent infinite loop
|
51
|
+
if start >= len(text):
|
52
|
+
break
|
53
|
+
|
54
|
+
return chunks
|
55
|
+
|
56
|
+
|
14
57
|
class LayoutDocumentChunking(DocumentChunkingBase):
|
15
58
|
def __init__(self) -> None:
|
16
59
|
pass
|
@@ -29,10 +72,13 @@ class LayoutDocumentChunking(DocumentChunkingBase):
|
|
29
72
|
logger.error("No documents provided for chunking.")
|
30
73
|
logger.debug(e)
|
31
74
|
document_url = None
|
32
|
-
|
33
|
-
|
75
|
+
|
76
|
+
splitter = SimpleTextSplitter(
|
77
|
+
chunk_size=chunking.chunk_size,
|
78
|
+
chunk_overlap=chunking.chunk_overlap
|
34
79
|
)
|
35
80
|
chunked_content_list = splitter.split_text(full_document_content)
|
81
|
+
|
36
82
|
# Create document for each chunk
|
37
83
|
documents = []
|
38
84
|
chunk_offset = 0
|
@@ -1,6 +1,5 @@
|
|
1
1
|
from typing import List
|
2
2
|
from .document_chunking_base import DocumentChunkingBase
|
3
|
-
from langchain.text_splitter import MarkdownTextSplitter
|
4
3
|
from .chunking_strategy import ChunkingSettings
|
5
4
|
from ..common.source_document import SourceDocument
|
6
5
|
from ...utilities.helpers.env_helper import EnvHelper
|
@@ -10,6 +9,50 @@ log_execution = env_helper.LOG_EXECUTION
|
|
10
9
|
log_args = env_helper.LOG_ARGS
|
11
10
|
log_result = env_helper.LOG_RESULT
|
12
11
|
|
12
|
+
|
13
|
+
class SimpleTextSplitter:
|
14
|
+
"""Simple text splitter to replace LangChain's MarkdownTextSplitter."""
|
15
|
+
|
16
|
+
def __init__(self, chunk_size: int, chunk_overlap: int):
|
17
|
+
self.chunk_size = chunk_size
|
18
|
+
self.chunk_overlap = chunk_overlap
|
19
|
+
|
20
|
+
def split_text(self, text: str) -> List[str]:
|
21
|
+
"""Split text into chunks with overlap."""
|
22
|
+
if not text:
|
23
|
+
return []
|
24
|
+
|
25
|
+
chunks = []
|
26
|
+
start = 0
|
27
|
+
|
28
|
+
while start < len(text):
|
29
|
+
# Calculate end position
|
30
|
+
end = start + self.chunk_size
|
31
|
+
|
32
|
+
# If this is not the last chunk, try to find a good break point
|
33
|
+
if end < len(text):
|
34
|
+
# Look for sentence endings, then paragraph breaks, then word boundaries
|
35
|
+
for break_char in ['. ', '.\n', '\n\n', '\n', ' ']:
|
36
|
+
break_pos = text.rfind(break_char, start, end)
|
37
|
+
if break_pos > start:
|
38
|
+
end = break_pos + len(break_char)
|
39
|
+
break
|
40
|
+
|
41
|
+
# Extract chunk
|
42
|
+
chunk = text[start:end].strip()
|
43
|
+
if chunk:
|
44
|
+
chunks.append(chunk)
|
45
|
+
|
46
|
+
# Move start position (with overlap)
|
47
|
+
start = max(start + 1, end - self.chunk_overlap)
|
48
|
+
|
49
|
+
# Prevent infinite loop
|
50
|
+
if start >= len(text):
|
51
|
+
break
|
52
|
+
|
53
|
+
return chunks
|
54
|
+
|
55
|
+
|
13
56
|
class PageDocumentChunking(DocumentChunkingBase):
|
14
57
|
def __init__(self) -> None:
|
15
58
|
pass
|
@@ -25,8 +68,10 @@ class PageDocumentChunking(DocumentChunkingBase):
|
|
25
68
|
logger.error("No documents provided for chunking.")
|
26
69
|
logger.debug(e)
|
27
70
|
document_url = None
|
28
|
-
|
29
|
-
|
71
|
+
|
72
|
+
splitter = SimpleTextSplitter(
|
73
|
+
chunk_size=chunking.chunk_size,
|
74
|
+
chunk_overlap=chunking.chunk_overlap
|
30
75
|
)
|
31
76
|
documents_chunked = []
|
32
77
|
for idx, document in enumerate(documents):
|
@@ -1,16 +1,70 @@
|
|
1
1
|
from typing import List
|
2
2
|
import re
|
3
|
-
|
3
|
+
import requests
|
4
|
+
from bs4 import BeautifulSoup
|
4
5
|
from .document_loading_base import DocumentLoadingBase
|
5
6
|
from ..common.source_document import SourceDocument
|
6
7
|
|
7
8
|
|
9
|
+
class SimpleWebDocument:
|
10
|
+
"""Simple document class to replace LangChain's Document."""
|
11
|
+
def __init__(self, page_content: str, metadata: dict):
|
12
|
+
self.page_content = page_content
|
13
|
+
self.metadata = metadata
|
14
|
+
|
15
|
+
|
16
|
+
class SimpleWebLoader:
|
17
|
+
"""Simple web loader to replace LangChain's WebBaseLoader."""
|
18
|
+
|
19
|
+
def __init__(self, url: str):
|
20
|
+
self.url = url
|
21
|
+
|
22
|
+
def load(self) -> List[SimpleWebDocument]:
|
23
|
+
"""Load web content from URL."""
|
24
|
+
try:
|
25
|
+
# Fetch the webpage
|
26
|
+
headers = {
|
27
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
28
|
+
}
|
29
|
+
response = requests.get(self.url, headers=headers, timeout=30)
|
30
|
+
response.raise_for_status()
|
31
|
+
|
32
|
+
# Parse HTML content
|
33
|
+
soup = BeautifulSoup(response.content, 'html.parser')
|
34
|
+
|
35
|
+
# Remove script and style elements
|
36
|
+
for script in soup(["script", "style"]):
|
37
|
+
script.decompose()
|
38
|
+
|
39
|
+
# Get text content
|
40
|
+
text = soup.get_text()
|
41
|
+
|
42
|
+
# Clean up text
|
43
|
+
lines = (line.strip() for line in text.splitlines())
|
44
|
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
45
|
+
text = ' '.join(chunk for chunk in chunks if chunk)
|
46
|
+
|
47
|
+
return [SimpleWebDocument(
|
48
|
+
page_content=text,
|
49
|
+
metadata={"source": self.url}
|
50
|
+
)]
|
51
|
+
|
52
|
+
except Exception as e:
|
53
|
+
# Return empty content if loading fails
|
54
|
+
return [SimpleWebDocument(
|
55
|
+
page_content="",
|
56
|
+
metadata={"source": self.url, "error": str(e)}
|
57
|
+
)]
|
58
|
+
|
59
|
+
|
8
60
|
class WebDocumentLoading(DocumentLoadingBase):
|
9
61
|
def __init__(self) -> None:
|
10
62
|
super().__init__()
|
11
63
|
|
12
64
|
def load(self, document_url: str) -> List[SourceDocument]:
|
13
|
-
|
65
|
+
loader = SimpleWebLoader(document_url)
|
66
|
+
documents = loader.load()
|
67
|
+
|
14
68
|
for document in documents:
|
15
69
|
document.page_content = re.sub("\n{3,}", "\n\n", document.page_content)
|
16
70
|
# Remove half non-ascii character from start/end of doc content
|
@@ -20,6 +74,7 @@ class WebDocumentLoading(DocumentLoadingBase):
|
|
20
74
|
document.page_content = re.sub(pattern, "", document.page_content)
|
21
75
|
if document.page_content == "":
|
22
76
|
documents.remove(document)
|
77
|
+
|
23
78
|
source_documents: List[SourceDocument] = [
|
24
79
|
SourceDocument(
|
25
80
|
content=document.page_content,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from azure.identity import ChainedTokenCredential, DefaultAzureCredential
|
2
2
|
from typing import Union
|
3
|
-
|
3
|
+
|
4
4
|
from azure.core.credentials import AzureKeyCredential
|
5
5
|
from azure.search.documents import SearchClient
|
6
6
|
from azure.search.documents.indexes import SearchIndexClient
|
@@ -276,15 +276,6 @@ class AzureSearchHelper:
|
|
276
276
|
),
|
277
277
|
]
|
278
278
|
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
self.env_helper.AZURE_SEARCH_KEY
|
283
|
-
if self.env_helper.is_auth_type_keys()
|
284
|
-
else None
|
285
|
-
),
|
286
|
-
index_name=self.env_helper.AZURE_SEARCH_CONVERSATIONS_LOG_INDEX,
|
287
|
-
embedding_function=self.llm_helper.get_embedding_model().embed_query,
|
288
|
-
fields=fields,
|
289
|
-
user_agent="langchain chatwithyourdata-sa",
|
290
|
-
)
|
279
|
+
# Return simple search client instead of LangChain AzureSearch
|
280
|
+
# This maintains compatibility while removing LangChain dependency
|
281
|
+
return self.search_client
|
@@ -7,8 +7,7 @@ from ..azure_blob_storage_client import AzureBlobStorageClient
|
|
7
7
|
from ...document_chunking.chunking_strategy import ChunkingStrategy, ChunkingSettings
|
8
8
|
from ...document_loading import LoadingSettings, LoadingStrategy
|
9
9
|
from .embedding_config import EmbeddingConfig
|
10
|
-
|
11
|
-
from ...orchestrator import OrchestrationSettings
|
10
|
+
|
12
11
|
from ..env_helper import EnvHelper
|
13
12
|
from .assistant_strategy import AssistantStrategy
|
14
13
|
from .conversation_flow import ConversationFlow
|
@@ -43,12 +42,8 @@ class Config:
|
|
43
42
|
for c in config["document_processors"]
|
44
43
|
]
|
45
44
|
self.env_helper = EnvHelper()
|
46
|
-
|
47
|
-
|
48
|
-
}
|
49
|
-
self.orchestrator = OrchestrationSettings(
|
50
|
-
config.get("orchestrator", self.default_orchestration_settings)
|
51
|
-
)
|
45
|
+
# Orchestrator is always semantic kernel now
|
46
|
+
# No configuration needed as there's only one option
|
52
47
|
self.integrated_vectorization_config = (
|
53
48
|
IntegratedVectorizationConfig(config["integrated_vectorization_config"])
|
54
49
|
if self.env_helper.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION
|
@@ -93,7 +88,7 @@ class Config:
|
|
93
88
|
|
94
89
|
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
95
90
|
def get_available_orchestration_strategies(self):
|
96
|
-
return [
|
91
|
+
return ["semantic_kernel"] # Only semantic kernel is supported now
|
97
92
|
|
98
93
|
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=log_result)
|
99
94
|
def get_available_ai_assistant_types(self):
|
@@ -271,7 +266,7 @@ class ConfigHelper:
|
|
271
266
|
with open(config_file_path, encoding="utf-8") as f:
|
272
267
|
ConfigHelper._default_config = json.loads(
|
273
268
|
Template(f.read()).substitute(
|
274
|
-
ORCHESTRATION_STRATEGY=
|
269
|
+
ORCHESTRATION_STRATEGY="semantic_kernel",
|
275
270
|
LOG_USER_INTERACTIONS=(
|
276
271
|
False
|
277
272
|
if env_helper.DATABASE_TYPE == DatabaseType.POSTGRESQL.value
|
@@ -139,9 +139,7 @@
|
|
139
139
|
"log_user_interactions": "${LOG_USER_INTERACTIONS}",
|
140
140
|
"log_tokens": "${LOG_TOKENS}"
|
141
141
|
},
|
142
|
-
|
143
|
-
"strategy": "${ORCHESTRATION_STRATEGY}"
|
144
|
-
},
|
142
|
+
|
145
143
|
"enable_chat_history": true,
|
146
144
|
"database_type": "${DATABASE_TYPE}"
|
147
145
|
}
|
@@ -3,7 +3,7 @@ import os
|
|
3
3
|
|
4
4
|
import threading
|
5
5
|
# from dotenv import load_dotenv
|
6
|
-
|
6
|
+
|
7
7
|
from ..helpers.config.conversation_flow import ConversationFlow
|
8
8
|
from ..helpers.config.database_type import DatabaseType
|
9
9
|
|
@@ -130,10 +130,8 @@ class EnvHelper:
|
|
130
130
|
"USE_ADVANCED_IMAGE_PROCESSING", "False"
|
131
131
|
)
|
132
132
|
self.CONVERSATION_FLOW = os.getenv("CONVERSATION_FLOW", "custom")
|
133
|
-
# Orchestration Settings
|
134
|
-
self.ORCHESTRATION_STRATEGY =
|
135
|
-
"ORCHESTRATION_STRATEGY", "openai_function"
|
136
|
-
)
|
133
|
+
# Orchestration Settings - Always use semantic_kernel
|
134
|
+
self.ORCHESTRATION_STRATEGY = "semantic_kernel"
|
137
135
|
# PostgreSQL configuration
|
138
136
|
elif self.DATABASE_TYPE == DatabaseType.POSTGRESQL.value:
|
139
137
|
self.AZURE_POSTGRES_SEARCH_TOP_K = 5
|
@@ -154,7 +152,7 @@ class EnvHelper:
|
|
154
152
|
self.AZURE_SEARCH_USE_INTEGRATED_VECTORIZATION = False
|
155
153
|
self.USE_ADVANCED_IMAGE_PROCESSING = False
|
156
154
|
self.CONVERSATION_FLOW = ConversationFlow.CUSTOM.value
|
157
|
-
self.ORCHESTRATION_STRATEGY =
|
155
|
+
self.ORCHESTRATION_STRATEGY = "semantic_kernel"
|
158
156
|
else:
|
159
157
|
raise ValueError(
|
160
158
|
"Unsupported DATABASE_TYPE. Please set DATABASE_TYPE to 'CosmosDB' or 'PostgreSQL'."
|
@@ -1,7 +1,6 @@
|
|
1
1
|
from openai import AzureOpenAI
|
2
2
|
from typing import List, Union, cast
|
3
|
-
|
4
|
-
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
3
|
+
# Removed LangChain dependencies - using direct OpenAI SDK instead
|
5
4
|
from semantic_kernel.connectors.ai.open_ai import AzureChatCompletion
|
6
5
|
from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
|
7
6
|
AzureChatPromptExecutionSettings,
|
@@ -49,68 +48,32 @@ class LLMHelper:
|
|
49
48
|
|
50
49
|
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
51
50
|
def get_llm(self):
|
52
|
-
|
53
|
-
|
54
|
-
deployment_name=self.llm_model,
|
55
|
-
temperature=0,
|
56
|
-
max_tokens=self.llm_max_tokens,
|
57
|
-
openai_api_version=self.openai_client._api_version,
|
58
|
-
azure_endpoint=self.env_helper.AZURE_OPENAI_ENDPOINT,
|
59
|
-
api_key=self.env_helper.OPENAI_API_KEY,
|
60
|
-
)
|
61
|
-
else:
|
62
|
-
return AzureChatOpenAI(
|
63
|
-
deployment_name=self.llm_model,
|
64
|
-
temperature=0,
|
65
|
-
max_tokens=self.llm_max_tokens,
|
66
|
-
openai_api_version=self.openai_client._api_version,
|
67
|
-
azure_endpoint=self.env_helper.AZURE_OPENAI_ENDPOINT,
|
68
|
-
azure_ad_token_provider=self.token_provider,
|
69
|
-
)
|
51
|
+
# Return the OpenAI client directly instead of LangChain wrapper
|
52
|
+
return self.openai_client
|
70
53
|
|
71
|
-
# TODO: This needs to have a custom callback to stream back to the UI
|
72
54
|
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
73
55
|
def get_streaming_llm(self):
|
74
|
-
|
75
|
-
|
76
|
-
azure_endpoint=self.env_helper.AZURE_OPENAI_ENDPOINT,
|
77
|
-
api_key=self.env_helper.OPENAI_API_KEY,
|
78
|
-
streaming=True,
|
79
|
-
callbacks=[StreamingStdOutCallbackHandler],
|
80
|
-
deployment_name=self.llm_model,
|
81
|
-
temperature=0,
|
82
|
-
max_tokens=self.llm_max_tokens,
|
83
|
-
openai_api_version=self.openai_client._api_version,
|
84
|
-
)
|
85
|
-
else:
|
86
|
-
return AzureChatOpenAI(
|
87
|
-
azure_endpoint=self.env_helper.AZURE_OPENAI_ENDPOINT,
|
88
|
-
api_key=self.env_helper.OPENAI_API_KEY,
|
89
|
-
streaming=True,
|
90
|
-
callbacks=[StreamingStdOutCallbackHandler],
|
91
|
-
deployment_name=self.llm_model,
|
92
|
-
temperature=0,
|
93
|
-
max_tokens=self.llm_max_tokens,
|
94
|
-
openai_api_version=self.openai_client._api_version,
|
95
|
-
azure_ad_token_provider=self.token_provider,
|
96
|
-
)
|
56
|
+
# Return the OpenAI client directly - streaming is handled via stream=True parameter
|
57
|
+
return self.openai_client
|
97
58
|
|
98
59
|
@logger.trace_function(log_execution=log_execution, log_args=log_args, log_result=False)
|
99
60
|
def get_embedding_model(self):
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
)
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
61
|
+
# Return a simple embedding model wrapper that uses the OpenAI client directly
|
62
|
+
class EmbeddingModel:
|
63
|
+
def __init__(self, openai_client, embedding_model):
|
64
|
+
self.openai_client = openai_client
|
65
|
+
self.embedding_model = embedding_model
|
66
|
+
|
67
|
+
def embed_query(self, text: str) -> List[float]:
|
68
|
+
return (
|
69
|
+
self.openai_client.embeddings.create(
|
70
|
+
input=[text], model=self.embedding_model
|
71
|
+
)
|
72
|
+
.data[0]
|
73
|
+
.embedding
|
74
|
+
)
|
75
|
+
|
76
|
+
return EmbeddingModel(self.openai_client, self.embedding_model)
|
114
77
|
|
115
78
|
@logger.trace_function(log_execution=log_execution, log_args=False, log_result=False)
|
116
79
|
def generate_embeddings(self, input: Union[str, list[int]]) -> List[float]:
|
@@ -1,15 +1,12 @@
|
|
1
1
|
from typing import List
|
2
|
+
from ..orchestrator.semantic_kernel_orchestrator import SemanticKernelOrchestrator
|
2
3
|
|
3
|
-
|
4
|
-
from ..orchestrator import OrchestrationSettings
|
5
|
-
from ..orchestrator.strategies import get_orchestrator
|
6
|
-
|
7
|
-
__all__ = ["OrchestrationStrategy"]
|
4
|
+
__all__ = ["Orchestrator"]
|
8
5
|
|
9
6
|
|
10
7
|
class Orchestrator:
|
11
8
|
def __init__(self) -> None:
|
12
|
-
|
9
|
+
self.orchestrator = SemanticKernelOrchestrator()
|
13
10
|
|
14
11
|
async def handle_message(
|
15
12
|
self,
|
@@ -17,14 +14,8 @@ class Orchestrator:
|
|
17
14
|
chat_history: List[dict],
|
18
15
|
conversation_id: str,
|
19
16
|
user_info,
|
20
|
-
orchestrator: OrchestrationSettings,
|
21
17
|
**kwargs: dict,
|
22
18
|
) -> dict:
|
23
|
-
|
24
|
-
|
25
|
-
raise Exception(
|
26
|
-
f"Unknown orchestration strategy: {orchestrator.strategy.value}"
|
27
|
-
)
|
28
|
-
return await orchestrator.handle_message(
|
29
|
-
user_message, chat_history, conversation_id, user_info
|
19
|
+
return await self.orchestrator.handle_message(
|
20
|
+
user_message, chat_history, conversation_id, user_info, **kwargs
|
30
21
|
)
|
@@ -1,18 +1,3 @@
|
|
1
|
-
import
|
2
|
-
from typing import List
|
3
|
-
import os.path
|
4
|
-
import pkgutil
|
5
|
-
from .orchestration_strategy import OrchestrationStrategy
|
1
|
+
from .semantic_kernel_orchestrator import SemanticKernelOrchestrator
|
6
2
|
|
7
|
-
|
8
|
-
class OrchestrationSettings:
|
9
|
-
def __init__(self, orchestration: dict):
|
10
|
-
self.strategy = OrchestrationStrategy(orchestration["strategy"])
|
11
|
-
|
12
|
-
|
13
|
-
# Get a list of all the classes defined in the module
|
14
|
-
def get_all_classes() -> List[str]:
|
15
|
-
return [name for _, name, _ in pkgutil.iter_modules([os.path.dirname(__file__)])]
|
16
|
-
|
17
|
-
|
18
|
-
__all__ = get_all_classes()
|
3
|
+
__all__ = ["SemanticKernelOrchestrator"]
|