davidkhala.ai 0.0.9__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/PKG-INFO +8 -1
  2. davidkhala_ai-0.1.1/README.md +3 -0
  3. davidkhala_ai-0.1.1/davidkhala/ai/agent/README.md +7 -0
  4. davidkhala_ai-0.1.1/davidkhala/ai/agent/dify/__init__.py +9 -0
  5. davidkhala_ai-0.1.1/davidkhala/ai/agent/dify/base.py +7 -0
  6. davidkhala_ai-0.1.1/davidkhala/ai/agent/dify/knowledge.py +156 -0
  7. davidkhala_ai-0.1.1/davidkhala/ai/agent/ragflow.py +8 -0
  8. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/api/siliconflow.py +4 -3
  9. davidkhala_ai-0.1.1/davidkhala/ai/huggingface/inference.py +13 -0
  10. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/openai/__init__.py +3 -2
  11. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/openai/azure.py +8 -2
  12. davidkhala_ai-0.1.1/davidkhala/ai/openai/native.py +22 -0
  13. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/pyproject.toml +2 -1
  14. davidkhala_ai-0.0.9/README.md +0 -0
  15. davidkhala_ai-0.0.9/davidkhala/ai/openai/native.py +0 -12
  16. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/.gitignore +0 -0
  17. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/__init__.py +0 -0
  18. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/agent/__init__.py +0 -0
  19. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/agent/langgraph.py +0 -0
  20. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/ali/__init__.py +0 -0
  21. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/ali/dashscope.py +0 -0
  22. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/api/__init__.py +0 -0
  23. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/api/openrouter.py +0 -0
  24. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/google/__init__.py +0 -0
  25. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/google/adk.py +0 -0
  26. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/google/gemini.py +0 -0
  27. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/huggingface/BAAI.py +0 -0
  28. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/huggingface/__init__.py +0 -0
  29. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/model.py +0 -0
  30. {davidkhala_ai-0.0.9 → davidkhala_ai-0.1.1}/davidkhala/ai/opik.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: davidkhala.ai
3
- Version: 0.0.9
3
+ Version: 0.1.1
4
4
  Summary: misc AI modules
5
5
  Requires-Python: >=3.13
6
6
  Provides-Extra: ali
@@ -21,5 +21,12 @@ Provides-Extra: langchain
21
21
  Requires-Dist: langchain; extra == 'langchain'
22
22
  Requires-Dist: langchain-openai; extra == 'langchain'
23
23
  Requires-Dist: langgraph; extra == 'langchain'
24
+ Provides-Extra: ragflow
25
+ Requires-Dist: ragflow-sdk; extra == 'ragflow'
24
26
  Provides-Extra: telemetry
25
27
  Requires-Dist: opik; (python_version < '3.14') and extra == 'telemetry'
28
+ Description-Content-Type: text/markdown
29
+
30
+ # davidkhala.ai
31
+
32
+ For usage of `azure.ai.agents`, goto https://github.com/davidkhala/azure-utils/tree/main/py
@@ -0,0 +1,3 @@
1
+ # davidkhala.ai
2
+
3
+ For usage of `azure.ai.agents`, goto https://github.com/davidkhala/azure-utils/tree/main/py
@@ -0,0 +1,7 @@
1
+
2
+ # ragflow python sdk
3
+
4
+ beyond using official package `ragflow-sdk`, these are highlighted community client
5
+ - [ragflow-client](https://pypi.org/project/ragflow-client/)
6
+
7
+
@@ -0,0 +1,9 @@
1
+ from datetime import datetime
2
+
3
+
4
+ def with_datetime(filename:str):
5
+ if ' ' in filename:
6
+ raise ValueError(f"Filename '{filename}' should not contain spaces.")
7
+ return f"{datetime.now().strftime("%Y%m%d_%H%M%S")}.{filename}"
8
+
9
+
@@ -0,0 +1,7 @@
1
+ from davidkhala.utils.http_request import Request
2
+
3
+ class API(Request):
4
+ def __init__(self, api_key: str, base_url="https://api.dify.ai/v1"):
5
+ super().__init__({'bearer': api_key})
6
+ self.base_url = base_url
7
+ self.api_key = api_key
@@ -0,0 +1,156 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Iterable, TypedDict, Callable
6
+ from urllib.parse import urlparse
7
+
8
+ import requests
9
+
10
+ from davidkhala.ai.agent.dify.base import API
11
+
12
+
13
+ class DatasetDict(TypedDict):
14
+ id: str
15
+ name: str
16
+ description: str
17
+ provider: str
18
+ permission: str
19
+ data_source_type: str
20
+ indexing_technique: str
21
+ doc_form: str
22
+ runtime_mode: str
23
+ is_published: bool
24
+ enable_api: bool
25
+ # stats
26
+ app_count: int
27
+ document_count: int
28
+ word_count: int
29
+ total_documents: int
30
+ total_available_documents: int
31
+ # embedding
32
+ embedding_available: bool
33
+ embedding_model: str
34
+ embedding_model_provider: str
35
+ retrieval_model_dict: dict
36
+ external_retrieval_model: dict
37
+ external_knowledge_info: dict
38
+
39
+
40
+ class Iterator(Iterable):
41
+ def __iter__(self):
42
+ return self
43
+
44
+ def __init__(self, get_fn: Callable, r):
45
+ self.response = r
46
+ self.fn = get_fn
47
+
48
+ def __next__(self):
49
+ if self.response and not self.response['has_more']:
50
+ raise StopIteration
51
+ self.response = self.fn()
52
+ return self.response['data']
53
+
54
+
55
+ class DocumentDict(TypedDict):
56
+ id: str
57
+ position: int
58
+ data_source_type: str
59
+ data_source_info: dict[str, str]
60
+ data_source_detail_dict: dict[str, dict]
61
+ dataset_process_rule_id: str
62
+ name: str
63
+ created_from: str
64
+ created_by: str
65
+ created_at: int
66
+ tokens: int
67
+ indexing_status: str
68
+ error: str
69
+ enabled: bool
70
+ archived: bool
71
+ display_status: str
72
+ word_count: int
73
+ hit_count: int
74
+ doc_form: str
75
+ doc_metadata: dict
76
+ disabled_at: int
77
+ disabled_by: str
78
+
79
+
80
+ class Dataset(API):
81
+ def __init__(self, api_key: str, base_url="https://api.dify.ai/v1"):
82
+ super().__init__(api_key, f"{base_url}/datasets")
83
+
84
+ def list(self):
85
+ return self.request(self.base_url, "GET")
86
+
87
+ def list_all(self) -> Iterable[DatasetDict]:
88
+ return Iterator(self.list, None)
89
+
90
+ @property
91
+ def ids(self):
92
+ for sub_list in self.list_all():
93
+ for dataset in sub_list:
94
+ yield dataset['id']
95
+
96
+ class Instance(API):
97
+ def __init__(self, d: Dataset, dataset_id: str):
98
+ super().__init__(d.api_key, f"{d.base_url}/{dataset_id}")
99
+
100
+ def get(self):
101
+ return self.request(self.base_url, "GET")
102
+
103
+ def upload(self, filename, *, path=None, url=None, document_id=None):
104
+ """
105
+ don't work for html
106
+ work for markdown
107
+ TODO how to simulate console
108
+ """
109
+ files = {}
110
+ if path:
111
+ with open(path, 'rb') as f:
112
+ content = f.read()
113
+ if not filename:
114
+ filename = os.path.basename(path)
115
+ elif url:
116
+ r = requests.get(url)
117
+ r.raise_for_status()
118
+ if not filename:
119
+ parsed_url = urlparse(url)
120
+ filename = Path(parsed_url.path).name
121
+ content = r.content
122
+ files['file'] = (filename, content)
123
+ if document_id:
124
+ # don't work for html
125
+ r = requests.post(f"{self.base_url}/documents/{document_id}/update-by-file", files=files,
126
+ **self.options)
127
+ else:
128
+ r = requests.post(f"{self.base_url}/document/create-by-file", files=files, **self.options)
129
+ r = self.on_response(r)
130
+ return r['document']
131
+
132
+ def list(self):
133
+ return self.request(f"{self.base_url}/documents", "GET")
134
+
135
+ def list_documents(self) -> Iterable[DocumentDict]:
136
+ return Iterator(self.list, None)
137
+
138
+ def has_document(self, name) -> bool:
139
+ return any(name == item['name'] for row in self.list_documents() for item in row)
140
+
141
+
142
+ class Document(API):
143
+ def __init__(self, d: Dataset.Instance, document_id: str):
144
+ super().__init__(d.api_key, f"{d.base_url}/documents/{document_id}")
145
+
146
+ def get(self):
147
+ return self.request(self.base_url, "GET")
148
+
149
+ def exist(self):
150
+ try:
151
+ self.get()
152
+ return True
153
+ except requests.exceptions.HTTPError as e:
154
+ if e.response.status_code == 404:
155
+ return False
156
+ raise e
@@ -0,0 +1,8 @@
1
+ from ragflow_sdk import RAGFlow
2
+ class Client:
3
+ def __init__(self, api_key:str, base_url ="http://localhost:9380/"):
4
+ self.client = RAGFlow(api_key=api_key, base_url=base_url)
5
+
6
+ @property
7
+ def datasets(self):
8
+ return self.client.list_datasets()
@@ -48,7 +48,7 @@ class SiliconFlow(API):
48
48
  response = self.request(f"{self.base_url}/embeddings", "POST", json=json)
49
49
  return [_['embedding'] for _ in response['data']]
50
50
 
51
- def which(self, query: str, documents: list[str], **kwargs):
51
+ def which(self, query: str, documents: list[str], **kwargs)->tuple[str,int]:
52
52
  json = {
53
53
  'model': self.model,
54
54
  'query': query,
@@ -56,6 +56,7 @@ class SiliconFlow(API):
56
56
  **kwargs
57
57
  }
58
58
  response = self.request(f"{self.base_url}/rerank", "POST", json=json)
59
- most_relevant = max(response['results'], key=lambda x: x['relevance_score'])
60
- return documents[most_relevant['index']]
59
+ most_relevant_index = max(response['results'], key=lambda x: x['relevance_score'])['index']
60
+
61
+ return documents[most_relevant_index], most_relevant_index
61
62
 
@@ -0,0 +1,13 @@
1
+ from huggingface_hub import InferenceApi
2
+
3
+
4
+ class API:
5
+ def __init__(self, token):
6
+ self.inference = None
7
+ self.token = token
8
+
9
+ def as_model(self, repo_id):
10
+ self.inference = InferenceApi(repo_id=repo_id, token=self.token)
11
+
12
+ def call(self, **kwargs):
13
+ return self.inference(**kwargs)
@@ -22,7 +22,7 @@ class Client(AbstractClient):
22
22
  )
23
23
  return [item.embedding for item in response.data]
24
24
 
25
- def chat(self, *user_prompt):
25
+ def chat(self, *user_prompt, **kwargs):
26
26
 
27
27
  messages = [
28
28
  *self.messages,
@@ -47,7 +47,8 @@ class Client(AbstractClient):
47
47
  response = self.client.chat.completions.create(
48
48
  model=self.model,
49
49
  messages=messages,
50
- n=self.n
50
+ n=self.n,
51
+ **kwargs
51
52
  )
52
53
  contents = [choice.message.content for choice in response.choices]
53
54
  assert len(contents) == self.n
@@ -5,7 +5,13 @@ from openai import AzureOpenAI, OpenAI
5
5
  from davidkhala.ai.openai import Client
6
6
 
7
7
 
8
- class ModelDeploymentClient(Client):
8
+ class AzureHosted(Client):
9
+ def chat(self, *user_prompt, **kwargs):
10
+ if 'web_search_options' in kwargs:
11
+ raise ValueError('Web search options not supported in any models of Azure AI Foundry')
12
+ return super().chat(*user_prompt, **kwargs)
13
+
14
+ class ModelDeploymentClient(AzureHosted):
9
15
  def __init__(self, key, deployment):
10
16
  self.client = AzureOpenAI(
11
17
  api_version="2024-12-01-preview", # mandatory
@@ -15,7 +21,7 @@ class ModelDeploymentClient(Client):
15
21
 
16
22
 
17
23
  @warnings.deprecated("Azure Open AI is deprecated. Please migrate to Azure AI Foundry")
18
- class OpenAIClient(Client):
24
+ class OpenAIClient(AzureHosted):
19
25
 
20
26
  def __init__(self, api_key, project):
21
27
  self.client = OpenAI(
@@ -0,0 +1,22 @@
1
+ from typing import Optional, Literal
2
+
3
+ from openai import OpenAI
4
+
5
+ from davidkhala.ai.openai import Client
6
+
7
+
8
+ class NativeClient(Client):
9
+ def __init__(self, api_key, base_url=None):
10
+ self.client = OpenAI(
11
+ api_key=api_key,
12
+ base_url=base_url
13
+ )
14
+
15
+ def chat(self, *user_prompt, web_search:Optional[Literal["low", "medium", "high"]]=None, **kwargs):
16
+ opts = {
17
+ **kwargs
18
+ }
19
+ if web_search:
20
+ from openai.types.chat.completion_create_params import WebSearchOptions
21
+ opts['web_search_options'] = WebSearchOptions(search_context_size=web_search)
22
+ return super().chat(*user_prompt, **opts)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "davidkhala.ai"
3
- version = "0.0.9"
3
+ version = "0.1.1"
4
4
  description = "misc AI modules"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13"
@@ -10,6 +10,7 @@ langchain = [
10
10
  "langgraph", "langchain",
11
11
  "langchain-openai" # required by openRouter
12
12
  ]
13
+ ragflow = ["ragflow-sdk"]
13
14
  google = ["google-genai", "google-adk"]
14
15
  api = ["davidkhala.utils[http_request]"]
15
16
  hf = [
File without changes
@@ -1,12 +0,0 @@
1
- from openai import OpenAI
2
-
3
- from davidkhala.ai.openai import Client
4
-
5
-
6
- class NativeClient(Client):
7
- def __init__(self, api_key, base_url=None):
8
- self.client = OpenAI(
9
- api_key=api_key,
10
- base_url=base_url
11
- )
12
-
File without changes