davidkhala.ai 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- davidkhala/ai/agent/README.md +7 -0
- davidkhala/ai/agent/dify/__init__.py +9 -0
- davidkhala/ai/agent/dify/base.py +7 -0
- davidkhala/ai/agent/dify/knowledge.py +163 -0
- davidkhala/ai/agent/ragflow.py +8 -0
- davidkhala/ai/huggingface/inference.py +13 -0
- davidkhala/ai/openai/__init__.py +3 -2
- davidkhala/ai/openai/azure.py +8 -2
- davidkhala/ai/openai/native.py +10 -0
- {davidkhala_ai-0.1.0.dist-info → davidkhala_ai-0.1.2.dist-info}/METADATA +8 -1
- {davidkhala_ai-0.1.0.dist-info → davidkhala_ai-0.1.2.dist-info}/RECORD +12 -6
- {davidkhala_ai-0.1.0.dist-info → davidkhala_ai-0.1.2.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Iterable, TypedDict, Callable, Any, Optional
|
|
6
|
+
from urllib.parse import urlparse
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from davidkhala.ai.agent.dify.base import API
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class DatasetDict(TypedDict):
|
|
14
|
+
id: str
|
|
15
|
+
name: str
|
|
16
|
+
description: str
|
|
17
|
+
provider: str
|
|
18
|
+
permission: str
|
|
19
|
+
data_source_type: str
|
|
20
|
+
indexing_technique: str
|
|
21
|
+
doc_form: str
|
|
22
|
+
runtime_mode: str
|
|
23
|
+
is_published: bool
|
|
24
|
+
enable_api: bool
|
|
25
|
+
# stats
|
|
26
|
+
app_count: int
|
|
27
|
+
document_count: int
|
|
28
|
+
word_count: int
|
|
29
|
+
total_documents: int
|
|
30
|
+
total_available_documents: int
|
|
31
|
+
# embedding
|
|
32
|
+
embedding_available: bool
|
|
33
|
+
embedding_model: str
|
|
34
|
+
embedding_model_provider: str
|
|
35
|
+
retrieval_model_dict: dict
|
|
36
|
+
external_retrieval_model: dict
|
|
37
|
+
external_knowledge_info: dict
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class Iterator(Iterable):
|
|
41
|
+
def __iter__(self):
|
|
42
|
+
return self
|
|
43
|
+
|
|
44
|
+
def __init__(self, get_fn: Callable[[int, int], Any], r: Optional[dict]):
|
|
45
|
+
self.response = r
|
|
46
|
+
self.fn = get_fn
|
|
47
|
+
|
|
48
|
+
def __next__(self):
|
|
49
|
+
if self.response and not self.response['has_more']:
|
|
50
|
+
raise StopIteration
|
|
51
|
+
page = 1 if not self.response else self.response['page'] + 1
|
|
52
|
+
limit = None if not self.response else self.response['limit']
|
|
53
|
+
self.response = self.fn(page, limit)
|
|
54
|
+
return self.response['data']
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class DocumentDict(TypedDict):
|
|
58
|
+
id: str
|
|
59
|
+
position: int
|
|
60
|
+
data_source_type: str
|
|
61
|
+
data_source_info: dict[str, str]
|
|
62
|
+
data_source_detail_dict: dict[str, dict]
|
|
63
|
+
dataset_process_rule_id: str
|
|
64
|
+
name: str
|
|
65
|
+
created_from: str
|
|
66
|
+
created_by: str
|
|
67
|
+
created_at: int
|
|
68
|
+
tokens: int
|
|
69
|
+
indexing_status: str
|
|
70
|
+
error: str
|
|
71
|
+
enabled: bool
|
|
72
|
+
archived: bool
|
|
73
|
+
display_status: str
|
|
74
|
+
word_count: int
|
|
75
|
+
hit_count: int
|
|
76
|
+
doc_form: str
|
|
77
|
+
doc_metadata: dict
|
|
78
|
+
disabled_at: int
|
|
79
|
+
disabled_by: str
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class Dataset(API):
|
|
83
|
+
def __init__(self, api_key: str, base_url="https://api.dify.ai/v1"):
|
|
84
|
+
super().__init__(api_key, f"{base_url}/datasets")
|
|
85
|
+
|
|
86
|
+
def paginate_datasets(self, page=1, size=20):
|
|
87
|
+
r = self.request(self.base_url, "GET", params={
|
|
88
|
+
'page': page,
|
|
89
|
+
'limit': size,
|
|
90
|
+
})
|
|
91
|
+
return r
|
|
92
|
+
|
|
93
|
+
def list_datasets(self) -> Iterable[list[DatasetDict]]:
|
|
94
|
+
return Iterator(self.paginate_datasets, None)
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def ids(self):
|
|
98
|
+
for sub_list in self.list_datasets():
|
|
99
|
+
for dataset in sub_list:
|
|
100
|
+
yield dataset['id']
|
|
101
|
+
|
|
102
|
+
class Instance(API):
|
|
103
|
+
def __init__(self, d: Dataset, dataset_id: str):
|
|
104
|
+
super().__init__(d.api_key, f"{d.base_url}/{dataset_id}")
|
|
105
|
+
|
|
106
|
+
def get(self):
|
|
107
|
+
return self.request(self.base_url, "GET")
|
|
108
|
+
|
|
109
|
+
def upload(self, filename, *, path=None, url=None, document_id=None):
|
|
110
|
+
"""
|
|
111
|
+
don't work for html
|
|
112
|
+
work for markdown
|
|
113
|
+
TODO how to simulate console
|
|
114
|
+
"""
|
|
115
|
+
files = {}
|
|
116
|
+
if path:
|
|
117
|
+
with open(path, 'rb') as f:
|
|
118
|
+
content = f.read()
|
|
119
|
+
if not filename:
|
|
120
|
+
filename = os.path.basename(path)
|
|
121
|
+
elif url:
|
|
122
|
+
r = requests.get(url)
|
|
123
|
+
r.raise_for_status()
|
|
124
|
+
if not filename:
|
|
125
|
+
parsed_url = urlparse(url)
|
|
126
|
+
filename = Path(parsed_url.path).name
|
|
127
|
+
content = r.content
|
|
128
|
+
files['file'] = (filename, content)
|
|
129
|
+
if document_id:
|
|
130
|
+
# don't work for html
|
|
131
|
+
r = requests.post(f"{self.base_url}/documents/{document_id}/update-by-file", files=files,
|
|
132
|
+
**self.options)
|
|
133
|
+
else:
|
|
134
|
+
r = requests.post(f"{self.base_url}/document/create-by-file", files=files, **self.options)
|
|
135
|
+
r = self.on_response(r)
|
|
136
|
+
return r['document']
|
|
137
|
+
|
|
138
|
+
def paginate_documents(self, page=1, size=20):
|
|
139
|
+
return self.request(f"{self.base_url}/documents", "GET", params={
|
|
140
|
+
'page': page,
|
|
141
|
+
'limit': size
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
def list_documents(self) -> Iterable[list[DocumentDict]]:
|
|
145
|
+
return Iterator(self.paginate_documents, None)
|
|
146
|
+
|
|
147
|
+
def has_document(self, name) -> bool:
|
|
148
|
+
return any(name == item['name'] for row in self.list_documents() for item in row)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class Document(API):
|
|
152
|
+
def __init__(self, d: Dataset.Instance, document_id: str):
|
|
153
|
+
super().__init__(d.api_key, f"{d.base_url}/documents/{document_id}")
|
|
154
|
+
try:
|
|
155
|
+
self.get()
|
|
156
|
+
self.exist = True
|
|
157
|
+
except requests.exceptions.HTTPError as e:
|
|
158
|
+
if e.response.status_code == 404:
|
|
159
|
+
self.exist = False
|
|
160
|
+
raise e
|
|
161
|
+
|
|
162
|
+
def get(self):
|
|
163
|
+
return self.request(self.base_url, "GET")
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from huggingface_hub import InferenceApi
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class API:
|
|
5
|
+
def __init__(self, token):
|
|
6
|
+
self.inference = None
|
|
7
|
+
self.token = token
|
|
8
|
+
|
|
9
|
+
def as_model(self, repo_id):
|
|
10
|
+
self.inference = InferenceApi(repo_id=repo_id, token=self.token)
|
|
11
|
+
|
|
12
|
+
def call(self, **kwargs):
|
|
13
|
+
return self.inference(**kwargs)
|
davidkhala/ai/openai/__init__.py
CHANGED
|
@@ -22,7 +22,7 @@ class Client(AbstractClient):
|
|
|
22
22
|
)
|
|
23
23
|
return [item.embedding for item in response.data]
|
|
24
24
|
|
|
25
|
-
def chat(self, *user_prompt):
|
|
25
|
+
def chat(self, *user_prompt, **kwargs):
|
|
26
26
|
|
|
27
27
|
messages = [
|
|
28
28
|
*self.messages,
|
|
@@ -47,7 +47,8 @@ class Client(AbstractClient):
|
|
|
47
47
|
response = self.client.chat.completions.create(
|
|
48
48
|
model=self.model,
|
|
49
49
|
messages=messages,
|
|
50
|
-
n=self.n
|
|
50
|
+
n=self.n,
|
|
51
|
+
**kwargs
|
|
51
52
|
)
|
|
52
53
|
contents = [choice.message.content for choice in response.choices]
|
|
53
54
|
assert len(contents) == self.n
|
davidkhala/ai/openai/azure.py
CHANGED
|
@@ -5,7 +5,13 @@ from openai import AzureOpenAI, OpenAI
|
|
|
5
5
|
from davidkhala.ai.openai import Client
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
class
|
|
8
|
+
class AzureHosted(Client):
|
|
9
|
+
def chat(self, *user_prompt, **kwargs):
|
|
10
|
+
if 'web_search_options' in kwargs:
|
|
11
|
+
raise ValueError('Web search options not supported in any models of Azure AI Foundry')
|
|
12
|
+
return super().chat(*user_prompt, **kwargs)
|
|
13
|
+
|
|
14
|
+
class ModelDeploymentClient(AzureHosted):
|
|
9
15
|
def __init__(self, key, deployment):
|
|
10
16
|
self.client = AzureOpenAI(
|
|
11
17
|
api_version="2024-12-01-preview", # mandatory
|
|
@@ -15,7 +21,7 @@ class ModelDeploymentClient(Client):
|
|
|
15
21
|
|
|
16
22
|
|
|
17
23
|
@warnings.deprecated("Azure Open AI is deprecated. Please migrate to Azure AI Foundry")
|
|
18
|
-
class OpenAIClient(
|
|
24
|
+
class OpenAIClient(AzureHosted):
|
|
19
25
|
|
|
20
26
|
def __init__(self, api_key, project):
|
|
21
27
|
self.client = OpenAI(
|
davidkhala/ai/openai/native.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from typing import Optional, Literal
|
|
2
|
+
|
|
1
3
|
from openai import OpenAI
|
|
2
4
|
|
|
3
5
|
from davidkhala.ai.openai import Client
|
|
@@ -10,3 +12,11 @@ class NativeClient(Client):
|
|
|
10
12
|
base_url=base_url
|
|
11
13
|
)
|
|
12
14
|
|
|
15
|
+
def chat(self, *user_prompt, web_search:Optional[Literal["low", "medium", "high"]]=None, **kwargs):
|
|
16
|
+
opts = {
|
|
17
|
+
**kwargs
|
|
18
|
+
}
|
|
19
|
+
if web_search:
|
|
20
|
+
from openai.types.chat.completion_create_params import WebSearchOptions
|
|
21
|
+
opts['web_search_options'] = WebSearchOptions(search_context_size=web_search)
|
|
22
|
+
return super().chat(*user_prompt, **opts)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: davidkhala.ai
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: misc AI modules
|
|
5
5
|
Requires-Python: >=3.13
|
|
6
6
|
Provides-Extra: ali
|
|
@@ -21,5 +21,12 @@ Provides-Extra: langchain
|
|
|
21
21
|
Requires-Dist: langchain; extra == 'langchain'
|
|
22
22
|
Requires-Dist: langchain-openai; extra == 'langchain'
|
|
23
23
|
Requires-Dist: langgraph; extra == 'langchain'
|
|
24
|
+
Provides-Extra: ragflow
|
|
25
|
+
Requires-Dist: ragflow-sdk; extra == 'ragflow'
|
|
24
26
|
Provides-Extra: telemetry
|
|
25
27
|
Requires-Dist: opik; (python_version < '3.14') and extra == 'telemetry'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# davidkhala.ai
|
|
31
|
+
|
|
32
|
+
For usage of `azure.ai.agents`, goto https://github.com/davidkhala/azure-utils/tree/main/py
|
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
davidkhala/ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
davidkhala/ai/model.py,sha256=1wcXC8X8oqerMatlcPbZmuxZ-nJWdJKmaDSDgiGlUGw,647
|
|
3
3
|
davidkhala/ai/opik.py,sha256=YU1XuweMUAzUkhpjxhltt-SBBDBkR3z-PCNo0DqzBRs,39
|
|
4
|
+
davidkhala/ai/agent/README.md,sha256=kIPsx3gOjrpOw7w2qhNEALuCEQkuh4nYp6uBnijdvHE,178
|
|
4
5
|
davidkhala/ai/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
6
|
davidkhala/ai/agent/langgraph.py,sha256=jrc_Yvgo7eJjd3y5UJn0t1FzpnObDGYscwgsuVl2O_I,1052
|
|
7
|
+
davidkhala/ai/agent/ragflow.py,sha256=UaK31us6V0NhAPCthGo07rQsm72vlR-McmihC_NDe1g,273
|
|
8
|
+
davidkhala/ai/agent/dify/__init__.py,sha256=GNgNwwX75NGBNsyKnA_Qz63QnbwOwF4nKd5fhHwdcKM,246
|
|
9
|
+
davidkhala/ai/agent/dify/base.py,sha256=V4BIhG9oTg9ayPZnYVxxA1Yobl2C4ukSWEOu2U_l0Tw,259
|
|
10
|
+
davidkhala/ai/agent/dify/knowledge.py,sha256=FH-aSMx3EG7uXYBPL0olbOKfgeYEwkgL5SHYFsPSsKU,4980
|
|
6
11
|
davidkhala/ai/ali/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
12
|
davidkhala/ai/ali/dashscope.py,sha256=SZIzRhVHlLx3s5I2RNUh2-u8OoSdrbvoN5e1k8Mh8N0,1943
|
|
8
13
|
davidkhala/ai/api/__init__.py,sha256=q2Ro5nhW5kJx2CYR1MRVamjTT5tTexPZwhrS2hwAvFM,1319
|
|
@@ -13,9 +18,10 @@ davidkhala/ai/google/adk.py,sha256=QwxYoOzT2Hol03V4NM0PF_HAzUGb4fB18VUAYacYbAY,6
|
|
|
13
18
|
davidkhala/ai/google/gemini.py,sha256=Xf4HDOOcK4-jEBERzuLnQNFsU61P2fFx4K0z-ijvNHE,214
|
|
14
19
|
davidkhala/ai/huggingface/BAAI.py,sha256=LZ9kp5Gfql4UzuTn4osyekI6VV1H3RIfED2IolXFj5c,341
|
|
15
20
|
davidkhala/ai/huggingface/__init__.py,sha256=FJyU8eOfWQWKAvkIa5qwubF9ghsSQ8C0e6p6DKyomgs,521
|
|
16
|
-
davidkhala/ai/
|
|
17
|
-
davidkhala/ai/openai/
|
|
18
|
-
davidkhala/ai/openai/
|
|
19
|
-
|
|
20
|
-
davidkhala_ai-0.1.
|
|
21
|
-
davidkhala_ai-0.1.
|
|
21
|
+
davidkhala/ai/huggingface/inference.py,sha256=bYN0PtLF2CaIHzdTP4LaTALJhcawvuLnLR7rhMVqwDE,333
|
|
22
|
+
davidkhala/ai/openai/__init__.py,sha256=GXzWaw2ER3YFGHG6TPD9SmAHV6Tpsnqxj6tXlaWsrko,1897
|
|
23
|
+
davidkhala/ai/openai/azure.py,sha256=QR1uZj8qAyhpCjo3Ks5zNV8GfOp3-enyZs6fBvV-MkA,1110
|
|
24
|
+
davidkhala/ai/openai/native.py,sha256=MB0nDnzCOj_M42RMhdK3HTMVnxGnwpLT2GeLwSrepwI,704
|
|
25
|
+
davidkhala_ai-0.1.2.dist-info/METADATA,sha256=QHtyUo2dYlHTg2hpLKgVLGbDNg_z4fTpLesFKUCCxCY,1098
|
|
26
|
+
davidkhala_ai-0.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
27
|
+
davidkhala_ai-0.1.2.dist-info/RECORD,,
|
|
File without changes
|