miscai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ .venv
2
+ dist
miscai-0.1.0/LICENSE ADDED
@@ -0,0 +1,25 @@
1
+ The MIT License (MIT)
2
+ =====================
3
+
4
+ Copyright © '2026' 'Daniel (pui4)'
5
+
6
+ Permission is hereby granted, free of charge, to any person
7
+ obtaining a copy of this software and associated documentation
8
+ files (the “Software”), to deal in the Software without
9
+ restriction, including without limitation the rights to use,
10
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ copies of the Software, and to permit persons to whom the
12
+ Software is furnished to do so, subject to the following
13
+ conditions:
14
+
15
+ The above copyright notice and this permission notice shall be
16
+ included in all copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
19
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
20
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
21
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
22
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
23
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25
+ OTHER DEALINGS IN THE SOFTWARE.
miscai-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.4
2
+ Name: miscai
3
+ Version: 0.1.0
4
+ Summary: Miscellaneous differnet AI tools for embedding into projects.
5
+ License: The MIT License (MIT)
6
+ =====================
7
+
8
+ Copyright © '2026' 'Daniel (pui4)'
9
+
10
+ Permission is hereby granted, free of charge, to any person
11
+ obtaining a copy of this software and associated documentation
12
+ files (the “Software”), to deal in the Software without
13
+ restriction, including without limitation the rights to use,
14
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the
16
+ Software is furnished to do so, subject to the following
17
+ conditions:
18
+
19
+ The above copyright notice and this permission notice shall be
20
+ included in all copies or substantial portions of the Software.
21
+
22
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
23
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
24
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
26
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
27
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
29
+ OTHER DEALINGS IN THE SOFTWARE.
30
+ License-File: LICENSE
31
+ Requires-Python: >=3.14
32
+ Provides-Extra: dlm
33
+ Requires-Dist: chromadb>=1.5.2; extra == 'dlm'
34
+ Requires-Dist: peft>=0.19.1; extra == 'dlm'
35
+ Requires-Dist: requests>=2.34.2; extra == 'dlm'
36
+ Requires-Dist: torch>=2.10.0; extra == 'dlm'
37
+ Requires-Dist: transformers>=4.57.3; extra == 'dlm'
38
+ Provides-Extra: llm
39
+ Requires-Dist: chromadb>=1.5.2; extra == 'llm'
40
+ Requires-Dist: ollama>=0.6.1; extra == 'llm'
41
+ Requires-Dist: peft>=0.19.1; extra == 'llm'
42
+ Requires-Dist: torch>=2.10.0; extra == 'llm'
43
+ Requires-Dist: transformers>=4.57.3; extra == 'llm'
44
+ Provides-Extra: stt
45
+ Requires-Dist: numpy>=2.4.6; extra == 'stt'
46
+ Requires-Dist: pywhispercpp>=1.4.1; extra == 'stt'
47
+ Provides-Extra: tts
48
+ Requires-Dist: numpy>=2.4.6; extra == 'tts'
49
+ Requires-Dist: pocket-tts>=1.1.1; extra == 'tts'
50
+ Requires-Dist: scipy>=1.17.1; extra == 'tts'
51
+ Provides-Extra: vad
52
+ Requires-Dist: numpy>=2.4.6; extra == 'vad'
53
+ Requires-Dist: torch>=2.10.0; extra == 'vad'
54
+ Requires-Dist: torchaudio>=2.10.0; extra == 'vad'
55
+ Provides-Extra: wake
56
+ Requires-Dist: local-wake>=0.1.1; extra == 'wake'
57
+ Description-Content-Type: text/markdown
58
+
59
+ # MiscAI
60
+ Tools to make it easier to embed AI related things into your exsisting projects.
61
+
62
+ ## Features
63
+ - Ollama local LLM wrapper (with build-in Jina embeddings model)
64
+ - Mercury diffusion language model (with the same embeddings as with Ollama, requires a mercury API key)
65
+ - Easy tool function creation with both types of language models
66
+ - Whisper-cpp speech to text
67
+ - Text to speech cloning with pocket-tts (requires a hugging face account)
68
+ - Voice activity detection with silero
69
+ - Wakeword detection with local wake
70
+
71
+ ## Ollama local LLM
72
+ To use the local LLM with Ollama the host has to have Ollama installed already. The embeddings model is ran on the CPU as it is light and performant enough. Then install it into the python project with:
73
+ ```sh
74
+ pip install miscai[llm]
75
+ ```
76
+ Here is an example of how to use it:
77
+ ```python
78
+ from miscai.llm import LLM
79
+
80
+ llm = LLM(promt="You are helpful assistant.", model="qwen3:4b", convo_file="./convo.json")
81
+ print(llm.ask_LLM("Hello!"))
82
+ ```
83
+
84
+ ## Diffusion Language Model (DLM)
85
+ Use of the DLM module is very similar to using the local LLM. You will need to make a Mercury API key to use the wrapper. The embeddings model is ran on the CPU as it is light and performant enough. Then install it into the python project with:
86
+ ```sh
87
+ pip install miscai[dlm]
88
+ ```
89
+ Here is an example of how to use it:
90
+ ```python
91
+ from miscai.dlm import DLM
92
+
93
+ dlm = DLM(promt="You are helpful assistant.", model="mercury-v2", convo_file="./convo.json", api_key="123abc")
94
+ print(dlm.ask_LLM("Hello!"))
95
+ ```
96
+
97
+ ## Tool calling for Language models
98
+ This requires that you have installed one of the language models above. Here is an example of how to use it in a project:
99
+ ```python
100
+ from miscai.tools import ToolLoader
101
+
102
+ tool_loader = ToolLoader("./tools")
103
+ ```
104
+ Then when you are creating your language model object, create it like this (using the Ollama one for example):
105
+ ```python
106
+ llm = LLM(promt="You are helpful assistant.", model="qwen3:4b", convo_file="./convo.json", tools=tool_loader.get_tools())
107
+ ```
108
+ To create a tool, install the required dependencies to your project and place the python file in the directory specified in the ToolLoader object. Here is an example tool that gets the time with the 'pytz' package:
109
+ ```python
110
+ import pytz
111
+ from datetime import datetime
112
+
113
+ tool = {
114
+ "type": "function",
115
+ "function": {
116
+ "name": "get_current_time",
117
+ "description": "Gets the current time for a provided time zone.",
118
+ 'parameters': {
119
+ "type": "object",
120
+ "properties": {
121
+ "timezone": {
122
+ "type": "string",
123
+ "description": "The time zone as specified in the tz (zoneinfo) library."
124
+ }
125
+ },
126
+ "required": ["timezone"]
127
+ }
128
+ }
129
+ }
130
+
131
+ def get_current_time(timezone: str) -> str:
132
+ tz = pytz.timezone(timezone)
133
+ return str(datetime.now(tz))
134
+ ```
135
+
136
+ ## Speech to text
137
+ The whisper-cpp model is ran on the CPU but is quite performant and accurate, but may use a lot of CPU. The audio bytes are in a numpy array encoded at 16000kHz with 512 byte chunks. Install it into your project with:
138
+ ```sh
139
+ pip install miscai[stt]
140
+ ```
141
+ Here is an example of how to use it:
142
+ ```python
143
+ from miscai.stt import STT
144
+
145
+ stt = STT()
146
+ print(stt.transcribe(audio_bytes=audio_bytes))
147
+ ```
148
+
149
+ ## Text to speech
150
+ You will have to create a hugging face account and accept the eula for using the pocket-tts model. Then create an API key as you will need this later. The model is ran on the CPU for reason mentioned on the model's page. The audio base needs to be encoded with 16000kHz for the best results. The outputed audio is a numpy array. Then install it into the project using:
151
+ ```sh
152
+ pip install miscai[tts]
153
+ ```
154
+ Here is an example of how to use it:
155
+ ```python
156
+ from miscai.tts import TTS
157
+
158
+ tts = TTS("./voice_base.wav")
159
+ audio = tts.get_audio("Hello!")
160
+ ```
161
+
162
+ ## Voice activity detection
163
+ This uses Silero VAD for voice detection and it runs on the CPU due to the model being light weight enough. The audio bytes inputed is a numpy array with 512 byte chunks. For best results the audio should be encoded in 16000kHz. Install it into your project with:
164
+ ```sh
165
+ pip install miscai[vad]
166
+ ```
167
+ Here is an example of how to use it:
168
+ ```python
169
+ from miscai.vad import VAD
170
+
171
+ vad = VAD(threshold=0.5)
172
+ print(vad.is_speech(audio_bytes=audio_bytes))
173
+ ```
174
+
175
+ ## Wakeword detection
176
+ This uses local-wake and so any audio file of any person saying the wakeword works as the wakeword. The audio inputed is a audio stream with 512 byte chunks (best gotten through SoundDevice). For best results the audio for the input and the audio for the reference audio files should be encoded in 16000kHz. Install it into your project with:
177
+ ```sh
178
+ pip install miscai[wake]
179
+ ```
180
+ Here is an example of how to use it:
181
+ ```python
182
+ from miscai.wakeword import WakeWord
183
+
184
+ wake_word = WakeWord(threshold=0.5, audio_dir="./wakeword")
185
+ print("Begining to wait for wakeword")
186
+ wake_word.waitForWord(callback=awoke, stream=stream)
187
+
188
+ def awoke(detection: dict, stream: sd.InputStream):
189
+ print(f"Wake word detected: {detection['wakeword']}")
190
+ ```
191
+
192
+ ## Final notes
193
+ This project is in quite an early stage and could do with some more polish. It is not meant to be used in production but is good for making small prototypes for different ideas that you may have. I made this to make it easier to implement LLMs into my other projects and it grew from there. Changes are welcome as the documention is hastely written and the codes is arguable worse. Thanks for reading and maybe using this. :)
miscai-0.1.0/README.md ADDED
@@ -0,0 +1,135 @@
1
+ # MiscAI
2
+ Tools to make it easier to embed AI related things into your exsisting projects.
3
+
4
+ ## Features
5
+ - Ollama local LLM wrapper (with build-in Jina embeddings model)
6
+ - Mercury diffusion language model (with the same embeddings as with Ollama, requires a mercury API key)
7
+ - Easy tool function creation with both types of language models
8
+ - Whisper-cpp speech to text
9
+ - Text to speech cloning with pocket-tts (requires a hugging face account)
10
+ - Voice activity detection with silero
11
+ - Wakeword detection with local wake
12
+
13
+ ## Ollama local LLM
14
+ To use the local LLM with Ollama the host has to have Ollama installed already. The embeddings model is ran on the CPU as it is light and performant enough. Then install it into the python project with:
15
+ ```sh
16
+ pip install miscai[llm]
17
+ ```
18
+ Here is an example of how to use it:
19
+ ```python
20
+ from miscai.llm import LLM
21
+
22
+ llm = LLM(promt="You are helpful assistant.", model="qwen3:4b", convo_file="./convo.json")
23
+ print(llm.ask_LLM("Hello!"))
24
+ ```
25
+
26
+ ## Diffusion Language Model (DLM)
27
+ Use of the DLM module is very similar to using the local LLM. You will need to make a Mercury API key to use the wrapper. The embeddings model is ran on the CPU as it is light and performant enough. Then install it into the python project with:
28
+ ```sh
29
+ pip install miscai[dlm]
30
+ ```
31
+ Here is an example of how to use it:
32
+ ```python
33
+ from miscai.dlm import DLM
34
+
35
+ dlm = DLM(promt="You are helpful assistant.", model="mercury-v2", convo_file="./convo.json", api_key="123abc")
36
+ print(dlm.ask_LLM("Hello!"))
37
+ ```
38
+
39
+ ## Tool calling for Language models
40
+ This requires that you have installed one of the language models above. Here is an example of how to use it in a project:
41
+ ```python
42
+ from miscai.tools import ToolLoader
43
+
44
+ tool_loader = ToolLoader("./tools")
45
+ ```
46
+ Then when you are creating your language model object, create it like this (using the Ollama one for example):
47
+ ```python
48
+ llm = LLM(promt="You are helpful assistant.", model="qwen3:4b", convo_file="./convo.json", tools=tool_loader.get_tools())
49
+ ```
50
+ To create a tool, install the required dependencies to your project and place the python file in the directory specified in the ToolLoader object. Here is an example tool that gets the time with the 'pytz' package:
51
+ ```python
52
+ import pytz
53
+ from datetime import datetime
54
+
55
+ tool = {
56
+ "type": "function",
57
+ "function": {
58
+ "name": "get_current_time",
59
+ "description": "Gets the current time for a provided time zone.",
60
+ 'parameters': {
61
+ "type": "object",
62
+ "properties": {
63
+ "timezone": {
64
+ "type": "string",
65
+ "description": "The time zone as specified in the tz (zoneinfo) library."
66
+ }
67
+ },
68
+ "required": ["timezone"]
69
+ }
70
+ }
71
+ }
72
+
73
+ def get_current_time(timezone: str) -> str:
74
+ tz = pytz.timezone(timezone)
75
+ return str(datetime.now(tz))
76
+ ```
77
+
78
+ ## Speech to text
79
+ The whisper-cpp model is ran on the CPU but is quite performant and accurate, but may use a lot of CPU. The audio bytes are in a numpy array encoded at 16000kHz with 512 byte chunks. Install it into your project with:
80
+ ```sh
81
+ pip install miscai[stt]
82
+ ```
83
+ Here is an example of how to use it:
84
+ ```python
85
+ from miscai.stt import STT
86
+
87
+ stt = STT()
88
+ print(stt.transcribe(audio_bytes=audio_bytes))
89
+ ```
90
+
91
+ ## Text to speech
92
+ You will have to create a hugging face account and accept the eula for using the pocket-tts model. Then create an API key as you will need this later. The model is ran on the CPU for reason mentioned on the model's page. The audio base needs to be encoded with 16000kHz for the best results. The outputed audio is a numpy array. Then install it into the project using:
93
+ ```sh
94
+ pip install miscai[tts]
95
+ ```
96
+ Here is an example of how to use it:
97
+ ```python
98
+ from miscai.tts import TTS
99
+
100
+ tts = TTS("./voice_base.wav")
101
+ audio = tts.get_audio("Hello!")
102
+ ```
103
+
104
+ ## Voice activity detection
105
+ This uses Silero VAD for voice detection and it runs on the CPU due to the model being light weight enough. The audio bytes inputed is a numpy array with 512 byte chunks. For best results the audio should be encoded in 16000kHz. Install it into your project with:
106
+ ```sh
107
+ pip install miscai[vad]
108
+ ```
109
+ Here is an example of how to use it:
110
+ ```python
111
+ from miscai.vad import VAD
112
+
113
+ vad = VAD(threshold=0.5)
114
+ print(vad.is_speech(audio_bytes=audio_bytes))
115
+ ```
116
+
117
+ ## Wakeword detection
118
+ This uses local-wake and so any audio file of any person saying the wakeword works as the wakeword. The audio inputed is a audio stream with 512 byte chunks (best gotten through SoundDevice). For best results the audio for the input and the audio for the reference audio files should be encoded in 16000kHz. Install it into your project with:
119
+ ```sh
120
+ pip install miscai[wake]
121
+ ```
122
+ Here is an example of how to use it:
123
+ ```python
124
+ from miscai.wakeword import WakeWord
125
+
126
+ wake_word = WakeWord(threshold=0.5, audio_dir="./wakeword")
127
+ print("Begining to wait for wakeword")
128
+ wake_word.waitForWord(callback=awoke, stream=stream)
129
+
130
+ def awoke(detection: dict, stream: sd.InputStream):
131
+ print(f"Wake word detected: {detection['wakeword']}")
132
+ ```
133
+
134
+ ## Final notes
135
+ This project is in quite an early stage and could do with some more polish. It is not meant to be used in production but is good for making small prototypes for different ideas that you may have. I made this to make it easier to implement LLMs into my other projects and it grew from there. Changes are welcome as the documention is hastely written and the codes is arguable worse. Thanks for reading and maybe using this. :)
@@ -0,0 +1,45 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "miscai"
7
+ version = "0.1.0"
8
+ description = "Miscellaneous differnet AI tools for embedding into projects."
9
+ readme = "README.md"
10
+ requires-python = ">=3.14"
11
+ license = { file = "LICENSE" }
12
+
13
+ # Optional component dependencies
14
+ [project.optional-dependencies]
15
+ dlm = [
16
+ "chromadb>=1.5.2",
17
+ "torch>=2.10.0",
18
+ "transformers>=4.57.3",
19
+ "requests>=2.34.2",
20
+ "peft>=0.19.1",
21
+ ]
22
+ llm = [
23
+ "chromadb>=1.5.2",
24
+ "torch>=2.10.0",
25
+ "transformers>=4.57.3",
26
+ "ollama>=0.6.1",
27
+ "peft>=0.19.1",
28
+ ]
29
+ stt = [
30
+ "numpy>=2.4.6",
31
+ "pywhispercpp>=1.4.1",
32
+ ]
33
+ tts = [
34
+ "numpy>=2.4.6",
35
+ "pocket-tts>=1.1.1",
36
+ "scipy>=1.17.1",
37
+ ]
38
+ vad = [
39
+ "torch>=2.10.0",
40
+ "torchaudio>=2.10.0",
41
+ "numpy>=2.4.6",
42
+ ]
43
+ wake = [
44
+ "local-wake>=0.1.1",
45
+ ]
File without changes
@@ -0,0 +1,195 @@
1
+ import re
2
+ import json
3
+ try:
4
+ import torch
5
+ import chromadb
6
+ import requests
7
+ from transformers import AutoModel
8
+ from chromadb import EmbeddingFunction, Embeddings, Documents
9
+ except:
10
+ raise ImportError("The 'dlm' module is required to use this. Install it with 'pip install miscai[dlm]'.")
11
+
12
+ class DLM():
13
+ def __init__(self,
14
+ promt: str,
15
+ model: str,
16
+ convo_file: str,
17
+ api_key: str,
18
+ tools: tuple = (None, None),
19
+ think: str = "high",
20
+ ) -> None:
21
+ self.PROMT = promt
22
+ self.MODEL = model
23
+ self.API_KEY = api_key
24
+
25
+ self.tools_def, self.tools_fn = tools
26
+ self.think = think
27
+ self.convo_file = convo_file
28
+
29
+ self.jina_model = AutoModel.from_pretrained(
30
+ "jinaai/jina-embeddings-v5-text-small",
31
+ trust_remote_code=True,
32
+ dtype=torch.bfloat16,
33
+ attn_implementation="sdpa"
34
+ )
35
+ self.jina_model = self.jina_model.to("cpu")
36
+
37
+ chroma_client = chromadb.PersistentClient(path="./memory_db")
38
+ self.collection = chroma_client.get_or_create_collection(
39
+ name="ltm",
40
+ embedding_function=self.JinaEmbeddingFunction(self.jina_model)
41
+ )
42
+
43
+ self.query_ef = self.JinaQueryEmbeddingFunction(self.jina_model)
44
+
45
+ try:
46
+ with open(self.convo_file, "r") as file:
47
+ self.messages = json.load(file)
48
+ self.msg_count = len(self.messages)
49
+ except:
50
+ self.messages = []
51
+ self.msg_count = 0
52
+
53
+ def save_memory(self, text: str, memory_id: str) -> None:
54
+ self.collection.upsert(documents=[text], ids=[memory_id])
55
+
56
+ def retrieve_memories(self, query: str, n: int = 3) -> str:
57
+ query_embedding = self.query_ef([query])[0]
58
+ results = self.collection.query(query_embeddings=[query_embedding], n_results=n)
59
+ docs = results["documents"][0] # type: ignore
60
+ return "\n".join(docs) if docs else ""
61
+
62
+ def ask_LLM(self, text: str) -> str:
63
+ system_prompt = self.PROMT
64
+
65
+ memories = self.retrieve_memories(text)
66
+ if memories:
67
+ system_prompt += f"\n\nRelevant memories:\n{memories}"
68
+
69
+ self.messages.append({"role": "user", "content": text})
70
+
71
+ resp = requests.post(
72
+ "https://api.inceptionlabs.ai/v1/chat/completions",
73
+ headers={
74
+ 'Content-Type': 'application/json',
75
+ 'Authorization': f'Bearer {self.API_KEY}'
76
+ },
77
+ json={
78
+ "model": self.MODEL,
79
+ "messages": [{"role": "system", "content": system_prompt}] + self.messages[-10:],
80
+ "reasoning_effort": self.think,
81
+ "tools": self.tools_def
82
+ }
83
+ )
84
+ resp_j = resp.json()
85
+ print(f"API response: {resp_j}")
86
+
87
+ reply = resp_j["choices"][0]["message"]["content"] or ""
88
+ self.messages.append(self._serialize_assistant_message(resp_j["choices"][0]["message"]))
89
+ print(f"\n{reply}\n")
90
+
91
+ while resp_j["choices"][0]["message"]["tool_calls"]:
92
+ result = self.call_tools(resp_j["choices"][0]["message"]["tool_calls"]) # type: ignore
93
+ self.messages.extend(result)
94
+
95
+ resp = requests.post(
96
+ "https://api.inceptionlabs.ai/v1/chat/completions",
97
+ headers={
98
+ 'Content-Type': 'application/json',
99
+ 'Authorization': f'Bearer {self.API_KEY}'
100
+ },
101
+ json={
102
+ "model": self.MODEL,
103
+ "messages": [{"role": "system", "content": system_prompt}] + self.messages[-10:],
104
+ "reasoning_effort": self.think,
105
+ "tools": self.tools_def
106
+ }
107
+ )
108
+
109
+ resp_j = resp.json()
110
+ print(f"API response: {resp_j}")
111
+ reply = resp_j["choices"][0]["message"]["content"] or ""
112
+
113
+ self.messages.append(self._serialize_assistant_message(resp_j["choices"][0]["message"]))
114
+ print(f"\n{reply}\n")
115
+
116
+ self.msg_count += 1
117
+ self.save_memory(
118
+ f"User: {text}\nAssistant: {reply}",
119
+ memory_id=f"msg_{self.msg_count}"
120
+ )
121
+
122
+ with open(self.convo_file, "w") as file:
123
+ json.dump(self.messages, file)
124
+
125
+ return re.sub(r".*?</think>", "", reply, flags=re.DOTALL).strip()
126
+
127
+ def call_tools(self, tool_calls: list) -> list:
128
+ results = []
129
+
130
+ for call in tool_calls:
131
+ fn = call["function"]
132
+ fn_name = fn["name"]
133
+ fn_args = json.loads(fn["arguments"]) if fn["arguments"] else {}
134
+
135
+ if fn_name in self.tools_fn:
136
+ print(f"CALLING TOOL {fn_name}({fn_args})")
137
+ try:
138
+ out = self.tools_fn[fn_name](**fn_args)
139
+ content = str(out) if out is not None else "Done."
140
+ except Exception as e:
141
+ content = f"Error: {e}"
142
+ else:
143
+ content = f"Unknown tool: {fn_name}"
144
+
145
+ results.append({
146
+ "role": "tool",
147
+ "content": content,
148
+ "tool_call_id": call.get("id", fn_name) # call is a dict, use .get()
149
+ })
150
+
151
+ return results
152
+
153
+ def _serialize_assistant_message(self, message) -> dict:
154
+ msg = {"role": "assistant", "content": message.get("content") or ""}
155
+ tool_calls = message.get("tool_calls")
156
+ if tool_calls:
157
+ msg["tool_calls"] = [
158
+ {
159
+ "id": tc.get("id", tc["function"]["name"]),
160
+ "function": {
161
+ "name": tc["function"]["name"],
162
+ "arguments": tc["function"]["arguments"] # keep as raw string
163
+ }
164
+ }
165
+ for tc in tool_calls
166
+ ]
167
+ return msg
168
+
169
+ class JinaEmbeddingFunction(EmbeddingFunction):
170
+ def __init__(self, model) -> None:
171
+ super().__init__()
172
+
173
+ self.jina_model = model
174
+
175
+ def __call__(self, input: Documents) -> Embeddings:
176
+ embedings = self.jina_model.encode(
177
+ texts=input,
178
+ task="retrieval",
179
+ prompt_name="document"
180
+ )
181
+ return embedings.tolist()
182
+
183
+ class JinaQueryEmbeddingFunction(EmbeddingFunction):
184
+ def __init__(self, model) -> None:
185
+ super().__init__()
186
+
187
+ self.jina_model = model
188
+
189
+ def __call__(self, input: Documents) -> Embeddings:
190
+ embeddings = self.jina_model.encode(
191
+ texts=input,
192
+ task="retrieval",
193
+ prompt_name="query"
194
+ )
195
+ return embeddings.tolist()
@@ -0,0 +1,190 @@
1
+ import re
2
+ import json
3
+ try:
4
+ import torch
5
+ import chromadb
6
+ from transformers import AutoModel
7
+ from chromadb import EmbeddingFunction, Embeddings, Documents
8
+ from ollama import chat
9
+ from ollama import ChatResponse
10
+ except:
11
+ raise ImportError("The 'llm' module is required to use this. Install it with 'pip install miscai[llm]'.")
12
+
13
+ class LLM():
14
+ def __init__(self,
15
+ promt: str,
16
+ model: str,
17
+ convo_file: str,
18
+ tools: tuple = (None, None),
19
+ think: bool = True,
20
+ ) -> None:
21
+ self.PROMT = promt
22
+ self.MODEL = model
23
+
24
+ self.tools_def, self.tools_fn = tools
25
+ self.think = think
26
+ self.convo_file = convo_file
27
+
28
+ self.jina_model = AutoModel.from_pretrained(
29
+ "jinaai/jina-embeddings-v5-text-small",
30
+ trust_remote_code=True,
31
+ dtype=torch.bfloat16,
32
+ attn_implementation="sdpa"
33
+ )
34
+ self.jina_model = self.jina_model.to("cpu")
35
+
36
+ chroma_client = chromadb.PersistentClient(path="./memory_db")
37
+ self.collection = chroma_client.get_or_create_collection(
38
+ name="ltm",
39
+ embedding_function=self.JinaEmbeddingFunction(self.jina_model)
40
+ )
41
+
42
+ self.query_ef = self.JinaQueryEmbeddingFunction(self.jina_model)
43
+
44
+ try:
45
+ with open(self.convo_file, "r") as file:
46
+ self.messages = json.load(file)
47
+ self.msg_count = len(self.messages)
48
+ except:
49
+ self.messages = []
50
+ self.msg_count = 0
51
+
52
+ def save_memory(self, text: str, memory_id: str) -> None:
53
+ self.collection.upsert(documents=[text], ids=[memory_id])
54
+
55
+ def retrieve_memories(self, query: str, n: int = 3) -> str:
56
+ query_embedding = self.query_ef([query])[0]
57
+ results = self.collection.query(query_embeddings=[query_embedding], n_results=n)
58
+ docs = results["documents"][0] # type: ignore
59
+ return "\n".join(docs) if docs else ""
60
+
61
+ def _parse_arguments(self, arguments) -> dict:
62
+ """Ensure tool call arguments are always a dict, never a JSON string."""
63
+ if isinstance(arguments, dict):
64
+ return arguments
65
+ if isinstance(arguments, str):
66
+ try:
67
+ parsed = json.loads(arguments)
68
+ return parsed if isinstance(parsed, dict) else {}
69
+ except (json.JSONDecodeError, ValueError):
70
+ return {}
71
+ return dict(arguments) if arguments else {}
72
+
73
+ def ask_LLM(self, text: str) -> str:
74
+ system_prompt = self.PROMT
75
+
76
+ memories = self.retrieve_memories(text)
77
+ if memories:
78
+ system_prompt += f"\n\nRelevant memories:\n{memories}"
79
+
80
+ self.messages.append({"role": "user", "content": text})
81
+
82
+ resp: ChatResponse = chat(
83
+ model=self.MODEL,
84
+ messages=[{"role": "system", "content": system_prompt}] + self.messages[-10:],
85
+ stream=False,
86
+ think=self.think,
87
+ tools=self.tools_def
88
+ )
89
+
90
+ reply = resp.message.content or ""
91
+ self.messages.append(self._serialize_assistant_message(resp.message))
92
+ print(f"\n{reply}\n")
93
+
94
+ while resp.message.tool_calls:
95
+ result = self.call_tools(resp.message.tool_calls) # type: ignore
96
+ self.messages.extend(result)
97
+
98
+ resp: ChatResponse = chat(
99
+ model=self.MODEL,
100
+ messages=[{"role": "system", "content": system_prompt}] + self.messages[-10:],
101
+ stream=False,
102
+ think=self.think,
103
+ tools=self.tools_def
104
+ )
105
+
106
+ reply = resp.message.content or ""
107
+ self.messages.append(self._serialize_assistant_message(resp.message))
108
+ print(f"\n{reply}\n")
109
+
110
+ self.msg_count += 1
111
+ self.save_memory(
112
+ f"User: {text}\nAssistant: {reply}",
113
+ memory_id=f"msg_{self.msg_count}"
114
+ )
115
+
116
+ with open(self.convo_file, "w") as file:
117
+ json.dump(self.messages, file)
118
+
119
+ return re.sub(r".*?</think>", "", reply, flags=re.DOTALL).strip()
120
+
121
+ def call_tools(self, tool_calls: list) -> list:
122
+ results = []
123
+
124
+ for call in tool_calls:
125
+ fn = call.function
126
+ fn_name = fn.name
127
+ fn_args = self._parse_arguments(fn.arguments)
128
+
129
+ if fn_name in self.tools_fn:
130
+ print(f"CALLING TOOL {fn_name}({fn_args})")
131
+ try:
132
+ out = self.tools_fn[fn_name](**fn_args)
133
+ content = str(out) if out is not None else "Done."
134
+ except Exception as e:
135
+ content = f"Error: {e}"
136
+ else:
137
+ content = f"Unknown tool: {fn_name}"
138
+
139
+ results.append({
140
+ "role": "tool",
141
+ "content": content,
142
+ "tool_call_id": getattr(call, "id", fn_name)
143
+ })
144
+
145
+ return results
146
+
147
+ def _serialize_assistant_message(self, message) -> dict:
148
+ msg = {"role": "assistant", "content": message.content or ""}
149
+ tool_calls = message.tool_calls
150
+ if tool_calls:
151
+ msg["tool_calls"] = [
152
+ {
153
+ "id": getattr(tc, "id", tc.function.name),
154
+ "function": {
155
+ "name": tc.function.name,
156
+ # Always store as dict so reloaded messages stay valid
157
+ "arguments": self._parse_arguments(tc.function.arguments)
158
+ }
159
+ }
160
+ for tc in tool_calls
161
+ ]
162
+ return msg
163
+
164
+ class JinaEmbeddingFunction(EmbeddingFunction):
165
+ def __init__(self, model) -> None:
166
+ super().__init__()
167
+
168
+ self.jina_model = model
169
+
170
+ def __call__(self, input: Documents) -> Embeddings:
171
+ embedings = self.jina_model.encode(
172
+ texts=input,
173
+ task="retrieval",
174
+ prompt_name="document"
175
+ )
176
+ return embedings.tolist()
177
+
178
+ class JinaQueryEmbeddingFunction(EmbeddingFunction):
179
+ def __init__(self, model) -> None:
180
+ super().__init__()
181
+
182
+ self.jina_model = model
183
+
184
+ def __call__(self, input: Documents) -> Embeddings:
185
+ embeddings = self.jina_model.encode(
186
+ texts=input,
187
+ task="retrieval",
188
+ prompt_name="query"
189
+ )
190
+ return embeddings.tolist()
@@ -0,0 +1,19 @@
1
+ import io
2
+ try:
3
+ import numpy as np
4
+ from pywhispercpp.model import Model
5
+ except:
6
+ raise ImportError("The 'stt' module is required to use this. Install it with 'pip install miscai[stt]'.")
7
+
8
+ class STT():
9
+ def __init__(self) -> None:
10
+ self.model = Model('base.en')
11
+
12
+ def transcribe(self, audio_bytes: np.ndarray) -> str:
13
+ segments = self.model.transcribe(audio_bytes)
14
+
15
+ final = ""
16
+ for segment in segments:
17
+ final += segment.text + "\n"
18
+
19
+ return final
@@ -0,0 +1,38 @@
1
+ import os
2
+ import importlib.util
3
+
4
+ class ToolLoader():
5
+ def __init__(self, tools_dir: str) -> None:
6
+ self.tools_dir = tools_dir
7
+
8
+ def get_tools(self) -> tuple[list, dict]:
9
+ tools_list = []
10
+ func_map = {}
11
+
12
+ for filename in os.listdir(self.tools_dir):
13
+ if not filename.endswith(".py") or filename.startswith("_"):
14
+ continue
15
+
16
+ module_name = filename[:-3]
17
+ filepath = os.path.join(self.tools_dir, filename)
18
+
19
+ spec = importlib.util.spec_from_file_location(module_name, filepath)
20
+ module = importlib.util.module_from_spec(spec) # type: ignore
21
+ spec.loader.exec_module(module) # type: ignore
22
+
23
+ if not hasattr(module, "tool"):
24
+ print(f"TOOL: {filename} DOESN'T HAVE TOOL DEFINITION!")
25
+ continue
26
+
27
+ tool_def = module.tool
28
+ fn_name = tool_def["function"]["name"]
29
+
30
+ if not hasattr(module, fn_name):
31
+ print(f"TOOL {filename}'S FUNCTION DOESN'T MATCH DEFINITION!")
32
+ continue
33
+
34
+ tools_list.append(tool_def)
35
+ func_map[fn_name] = getattr(module, fn_name)
36
+ print(f"LOADED {filename} SUCCESSFULLY!")
37
+
38
+ return tools_list, func_map
@@ -0,0 +1,22 @@
1
+ import math
2
+ try:
3
+ import numpy as np
4
+ from pocket_tts import TTSModel
5
+ from scipy.signal import resample_poly
6
+ except:
7
+ raise ImportError("The 'tts' module is required to use this. Install it with 'pip install miscai[tts]'.")
8
+
9
+ class TTS():
10
+ def __init__(self, voice_base_file: str) -> None:
11
+ self.tts_model = TTSModel.load_model()
12
+ self.voice_state = self.tts_model.get_state_for_audio_prompt(
13
+ voice_base_file
14
+ )
15
+
16
+ def get_audio(self, text: str):
17
+ audio = self.tts_model.generate_audio(self.voice_state, text)
18
+
19
+ gcd = math.gcd(self.tts_model.sample_rate, 44100)
20
+ audio_resamp = resample_poly(audio.numpy(), 44100 // gcd, self.tts_model.sample_rate // gcd).astype(np.float32)
21
+
22
+ return audio_resamp, 44100
@@ -0,0 +1,24 @@
1
+ try:
2
+ import torch
3
+ import numpy as np
4
+ except:
5
+ raise ImportError("The 'vad' module is required to use this. Install it with 'pip install miscai[vad]'.")
6
+
7
+ class VAD():
8
+ def __init__(self, threshold: float) -> None:
9
+ self.model, self.utils = torch.hub.load(repo_or_dir='snakers4/silero-vad', model='silero_vad') # type: ignore
10
+
11
+ self.SAMPLING_RATE = 16000
12
+ self.CHUNK = 512
13
+ self.THRESHOLD = threshold
14
+
15
+ def is_speech(self, audio_bytes: np.ndarray) -> bool:
16
+ audio_np = audio_bytes.flatten().astype(np.float32)
17
+ audio_tensor = torch.from_numpy(audio_np)
18
+
19
+ speech_prob = self.model(audio_tensor, self.SAMPLING_RATE).item()
20
+
21
+ if speech_prob > self.THRESHOLD:
22
+ return True
23
+ else:
24
+ return False
@@ -0,0 +1,12 @@
1
+ try:
2
+ import lwake
3
+ except:
4
+ raise ImportError("The 'wake' module is required to use this. Install it with 'pip install miscai[wake]'.")
5
+
6
+ class WakeWord():
7
+ def __init__(self, threshold: float, audio_dir: str) -> None:
8
+ self.THRESHOLD = threshold
9
+ self.REF_DIR = audio_dir
10
+
11
+ def waitForWord(self, callback, stream = None ) -> None:
12
+ lwake.listen(self.REF_DIR, threshold=self.THRESHOLD, method="embedding", callback=callback, stream=stream)