davidkhala.ai 0.1.6__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/PKG-INFO +2 -3
  2. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/api/app.py +98 -0
  3. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/agent/dify/api/knowledge.py +13 -14
  4. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/common.py +36 -0
  5. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/ops/console/__init__.py +9 -0
  6. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/ops/console/knowledge.py +156 -0
  7. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/ops/console/session.py +30 -0
  8. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/ops/db/__init__.py +17 -0
  9. davidkhala_ai-0.1.6/davidkhala/ai/agent/dify/ops/db/__init__.py → davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/ops/db/app.py +16 -19
  10. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/ops/db/knowledge.py +52 -0
  11. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/ops/db/orm.py +151 -0
  12. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/ops/db/sys.py +6 -0
  13. davidkhala_ai-0.1.6/davidkhala/ai/agent/dify/plugin.py → davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/plugins/__init__.py +4 -0
  14. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/plugins/file.py +19 -0
  15. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/plugins/firecrawl.py +22 -0
  16. davidkhala_ai-0.1.7/davidkhala/ai/agent/dify/plugins/jina.py +4 -0
  17. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/openrouter/__init__.py +10 -2
  18. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/pyproject.toml +4 -6
  19. davidkhala_ai-0.1.6/davidkhala/ai/agent/dify/api/app.py +0 -38
  20. davidkhala_ai-0.1.6/davidkhala/ai/agent/dify/ops/db/orm.py +0 -50
  21. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/.gitignore +0 -0
  22. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/README.md +0 -0
  23. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/__init__.py +0 -0
  24. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/agent/README.md +0 -0
  25. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/agent/__init__.py +0 -0
  26. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/agent/dify/__init__.py +0 -0
  27. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/agent/dify/api/__init__.py +0 -0
  28. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/agent/dify/ops/__init__.py +0 -0
  29. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/agent/langgraph.py +0 -0
  30. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/agent/ragflow.py +0 -0
  31. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/ali/__init__.py +0 -0
  32. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/ali/dashscope.py +0 -0
  33. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/api/__init__.py +0 -0
  34. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/api/openrouter.py +0 -0
  35. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/api/siliconflow.py +0 -0
  36. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/google/__init__.py +0 -0
  37. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/google/adk.py +0 -0
  38. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/google/gemini.py +0 -0
  39. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/huggingface/BAAI.py +0 -0
  40. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/huggingface/__init__.py +0 -0
  41. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/huggingface/inference.py +0 -0
  42. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/model.py +0 -0
  43. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/openai/__init__.py +0 -0
  44. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/openai/azure.py +0 -0
  45. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/openai/native.py +0 -0
  46. {davidkhala_ai-0.1.6 → davidkhala_ai-0.1.7}/davidkhala/ai/opik.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: davidkhala.ai
3
- Version: 0.1.6
3
+ Version: 0.1.7
4
4
  Summary: misc AI modules
5
5
  Requires-Python: >=3.13
6
6
  Provides-Extra: ali
@@ -23,10 +23,9 @@ Requires-Dist: onnx; extra == 'hf'
23
23
  Requires-Dist: onnxruntime; extra == 'hf'
24
24
  Provides-Extra: langchain
25
25
  Requires-Dist: langchain; extra == 'langchain'
26
- Requires-Dist: langchain-openai; extra == 'langchain'
26
+ Requires-Dist: langchain-openai; (python_version < '3.14') and extra == 'langchain'
27
27
  Requires-Dist: langgraph; extra == 'langchain'
28
28
  Provides-Extra: openrouter
29
- Requires-Dist: davidkhala-utils[http-request]; extra == 'openrouter'
30
29
  Requires-Dist: openrouter; extra == 'openrouter'
31
30
  Provides-Extra: ragflow
32
31
  Requires-Dist: ragflow-sdk; extra == 'ragflow'
@@ -0,0 +1,98 @@
1
+ import json
2
+ from typing import TypedDict
3
+
4
+ import requests
5
+ from davidkhala.utils.http_request.stream import Request as StreamRequest, as_sse
6
+ from requests import Response, Session
7
+
8
+ from davidkhala.ai.agent.dify.api import API
9
+
10
+
11
+ class Feedbacks(API):
12
+ def paginate_feedbacks(self, page=1, size=20):
13
+ """
14
+ when 'rating'='like', content=None
15
+ when 'rating'='dislike', content can be filled by end user
16
+ NOTE: for security reason, api cannot access conversation context associated with the feedback. End user should copy the conversation to comment by themselves.
17
+ # waiting for https://github.com/langgenius/dify/issues/28067
18
+ """
19
+ response = requests.get(f"{self.base_url}/app/feedbacks", params={"page": page, "limit": size}, **self.options)
20
+ if not response.ok:
21
+ response.raise_for_status()
22
+ else:
23
+ return json.loads(response.text)
24
+
25
+ def list_feedbacks(self):
26
+ return self.paginate_feedbacks()['data']
27
+
28
+
29
+ class Conversation(API):
30
+ """
31
+ Note: The Service API does not share conversations created by the WebApp. Conversations created through the API are isolated from those created in the WebApp interface.
32
+ It means you cannot get user conversation content from API, API call has only access to conversation created by API
33
+ """
34
+
35
+ def __init__(self, api_key: str, user: str):
36
+ super().__init__(api_key) # base_url need to be configured afterward if not default
37
+ self.user = user # user_id, from_end_user_id
38
+
39
+ def paginate_messages(self, conversation_id):
40
+ return self.request(f"{self.base_url}/messages", "GET", params={
41
+ 'conversation_id': conversation_id,
42
+ 'user': self.user,
43
+ })
44
+
45
+ def _chat_request_from(self, template: str, stream, **kwargs):
46
+ """
47
+ :param template:
48
+ :param stream: Note: "Agent Chat App does not support blocking mode"
49
+ :param kwargs:
50
+ :return:
51
+ """
52
+ return {
53
+ 'url': f"{self.base_url}/chat-messages",
54
+ 'method': "POST",
55
+ 'json': {
56
+ 'query': template,
57
+ 'inputs': kwargs.pop('values', {}), # to substitute query/template
58
+ 'response_mode': 'streaming' if stream else 'blocking',
59
+ 'conversation_id': kwargs.pop('conversation_id', None),
60
+ 'user': self.user,
61
+ 'files': kwargs.pop('files', [])
62
+ },
63
+ **kwargs
64
+ }
65
+
66
+ def async_chat(self, template: str, **kwargs) -> tuple[Response, Session]:
67
+ s = StreamRequest(self)
68
+ s.session = Session()
69
+ return s.request(**self._chat_request_from(template, True, **kwargs)), s.session
70
+
71
+ class ChatResult(TypedDict, total=False):
72
+ thought: list[str]
73
+ metadata: dict
74
+
75
+ @staticmethod
76
+ def reduce_chat_stream(response: Response) -> ChatResult:
77
+ r: Conversation.ChatResult = {
78
+ 'thought': [],
79
+ }
80
+ for data in as_sse(response):
81
+ match data['event']:
82
+ case 'agent_thought':
83
+ r['thought'].append(data['thought'])
84
+ case 'message_end':
85
+ r['metadata'] = data['metadata']
86
+ return r
87
+
88
+ def agent_chat(self, template: str, **kwargs) -> ChatResult:
89
+ r, session = self.async_chat(template, **kwargs)
90
+ reduced = Conversation.reduce_chat_stream(r)
91
+ session.close()
92
+ return reduced
93
+
94
+ def bot_chat(self, template: str, **kwargs):
95
+ r = self.request(**self._chat_request_from(template, False, **kwargs))
96
+ assert r.pop('event') == 'message'
97
+ assert r.pop('mode') == 'chat'
98
+ return r
@@ -8,7 +8,7 @@ from urllib.parse import urlparse
8
8
  import requests
9
9
 
10
10
  from davidkhala.ai.agent.dify.api import API, Iterator
11
-
11
+ from davidkhala.ai.agent.dify.common import Document as DocumentBase
12
12
 
13
13
  class DatasetDict(TypedDict):
14
14
  id: str
@@ -37,21 +37,14 @@ class DatasetDict(TypedDict):
37
37
  external_knowledge_info: dict
38
38
 
39
39
 
40
- class DocumentDict(TypedDict):
41
- id: str
42
- position: int
43
- data_source_type: str
40
+ class Document(DocumentBase):
44
41
  data_source_info: dict[str, str]
45
42
  data_source_detail_dict: dict[str, dict]
46
43
  dataset_process_rule_id: str
47
- name: str
48
44
  created_from: str
49
45
  created_by: str
50
46
  created_at: int
51
47
  tokens: int
52
- indexing_status: str
53
- error: str
54
- enabled: bool
55
48
  archived: bool
56
49
  display_status: str
57
50
  word_count: int
@@ -91,9 +84,8 @@ class Dataset(API):
91
84
 
92
85
  def upload(self, filename, *, path=None, url=None, document_id=None):
93
86
  """
94
- don't work for html
95
- work for markdown
96
- TODO how to simulate console
87
+ don't work for .html
88
+ work for .md
97
89
  """
98
90
  files = {}
99
91
  if path:
@@ -124,10 +116,10 @@ class Dataset(API):
124
116
  'limit': size
125
117
  })
126
118
 
127
- def list_documents(self) -> Iterable[DocumentDict]:
119
+ def list_documents(self) -> Iterable[Document]:
128
120
  for document_batch in Iterator(self.paginate_documents, None):
129
121
  for document in document_batch:
130
- yield document
122
+ yield Document(**document)
131
123
 
132
124
  def has_document(self, name) -> bool:
133
125
  return any(name == item['name'] for row in self.list_documents() for item in row)
@@ -189,3 +181,10 @@ class Document(API):
189
181
  def delete(self):
190
182
  if self.exist():
191
183
  self.request(self.base_url, "DELETE")
184
+ class Chunk(API):
185
+ def __init__(self, d: Document, segment_id: str):
186
+ super().__init__(d.api_key, f"{d.base_url}/segments/{segment_id}")
187
+ def get(self):
188
+ r= self.request(self.base_url, "GET")
189
+ assert r['doc_form'] # optional value text_model
190
+ return r['data']
@@ -0,0 +1,36 @@
1
+ from enum import Enum
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from davidkhala.ai.agent.dify.plugins.firecrawl import DataSourceInfo
6
+
7
+
8
+ class IndexingStatus(str, Enum):
9
+ WAITING = "waiting"
10
+ PARSING = "parsing"
11
+ SPLITTING = 'splitting'
12
+ INDEXING = "indexing"
13
+ COMPLETED = "completed"
14
+ FAILED = "error"
15
+
16
+
17
+ class Document(BaseModel):
18
+ id: str
19
+ position: int
20
+ data_source_type: str
21
+ data_source_info: dict[str, str]
22
+ name: str
23
+ indexing_status: IndexingStatus
24
+ error: str | None
25
+ enabled: bool
26
+
27
+
28
+ class Dataset(BaseModel):
29
+ id: str
30
+ name: str
31
+ description: str
32
+
33
+
34
+ class IndexingError(Exception):
35
+ """Raised when document indexing fails (indexing_status = 'error')"""
36
+ pass
@@ -0,0 +1,9 @@
1
+ from davidkhala.utils.http_request import Request
2
+
3
+
4
+ class API(Request):
5
+ def __init__(self, base_url='http://localhost'):
6
+ super().__init__()
7
+ self.base_url = f"{base_url}/console/api"
8
+ self.__enter__()
9
+
@@ -0,0 +1,156 @@
1
+ from time import sleep
2
+
3
+ from davidkhala.utils.http_request.stream import as_sse, Request as StreamRequest
4
+ from pydantic import BaseModel
5
+
6
+ from davidkhala.ai.agent.dify.common import IndexingStatus, IndexingError, Dataset, Document
7
+ from davidkhala.ai.agent.dify.ops.console import API
8
+ from davidkhala.ai.agent.dify.ops.console.session import ConsoleUser
9
+ from davidkhala.ai.agent.dify.ops.db.orm import Node
10
+
11
+
12
+ class ConsoleKnowledge(API):
13
+ def __init__(self, context: ConsoleUser):
14
+ super().__init__()
15
+ self.base_url = context.base_url
16
+ self.session.cookies = context.session.cookies
17
+ self.options = context.options
18
+
19
+
20
+ class Datasource(ConsoleKnowledge):
21
+ """step 1: Choose a Data Source"""
22
+
23
+ class FirecrawlOutput(BaseModel):
24
+ source_url: str
25
+ description: str
26
+ title: str
27
+ credential_id: str
28
+ content: str
29
+
30
+ def run_firecrawl(self, pipeline: str, node: Node,
31
+ *,
32
+ inputs: dict,
33
+ credential_id: str
34
+ ):
35
+
36
+ url = f"{self.base_url}/rag/pipelines/{pipeline}/workflows/published/datasource/nodes/{node.id}/run"
37
+
38
+ stream_request = StreamRequest(self)
39
+ response = stream_request.request(url, 'POST', json={
40
+ 'inputs': inputs,
41
+ 'datasource_type': node.datasource_type,
42
+ 'credential_id': credential_id,
43
+ "response_mode": "streaming"
44
+ })
45
+
46
+ for data in as_sse(response):
47
+ event = data['event']
48
+ if event == 'datasource_completed':
49
+ return data['data']
50
+ else:
51
+ assert event == 'datasource_processing'
52
+ print(data)
53
+ return None
54
+
55
+ def upload(self):
56
+ "http://localhost/console/api/files/upload?source=datasets"
57
+ # TODO
58
+ "form data"
59
+ {
60
+ "file": "body"
61
+ }
62
+ r = {
63
+ "id": "3898db5b-eb72-4f11-b507-628ad5d28887",
64
+ "name": "Professional Diploma Meister Power Electrical Engineering - Technological and Higher Education Institute of Hong Kong.html",
65
+ "size": 254362,
66
+ "extension": "html",
67
+ "mime_type": "text\/html",
68
+ "created_by": "dbd0b38b-5ef1-4123-8c3f-0c82eb1feacd",
69
+ "created_at": 1764943811,
70
+ "source_url": "\/files\/3898db5b-eb72-4f11-b507-628ad5d28887\/file-preview?timestamp=1764943811&nonce=43b0ff5a13372415be79de4cc7ef398c&sign=7OJ2wiVYc4tygl7yvM1sPn7s0WXDlhHxgX76bsGTD94%3D"
71
+ }
72
+
73
+
74
+ class Operation(ConsoleKnowledge):
75
+ def website_sync(self, dataset: str, document: str, *, wait_until=True):
76
+ """
77
+ cannot be used towards a pipeline dataset. Otherwise, you will see error "no website import info found"
78
+ """
79
+ doc_url = f"{self.base_url}/datasets/{dataset}/documents/{document}"
80
+
81
+ r = self.request(f"{doc_url}/website-sync", "GET")
82
+ assert r == {"result": "success"}
83
+ if wait_until:
84
+ return self.wait_until(dataset, document)
85
+ return None
86
+
87
+ def retry(self, dataset: str, *documents: str, wait_until=True):
88
+ """
89
+ It cannot trigger rerun on success documents
90
+ """
91
+ url = f"{self.base_url}/datasets/{dataset}/retry"
92
+ self.request(url, "POST", json={
93
+ 'document_ids': documents,
94
+ })
95
+ # response status code will be 204
96
+ if wait_until:
97
+ return [self.wait_until(dataset, document) for document in documents]
98
+ return None
99
+
100
+ def rerun(self, dataset: str, *documents: str):
101
+ for document in documents:
102
+ try:
103
+ self.website_sync(dataset, document)
104
+ assert False, "expect IndexingError"
105
+ except IndexingError:
106
+ pass
107
+ return self.retry(dataset, *documents)
108
+
109
+ def wait_until(self, dataset: str, document: str, *,
110
+ expect_status=None,
111
+ from_status=None,
112
+ interval=1
113
+ ):
114
+ if not expect_status:
115
+ expect_status = [IndexingStatus.FAILED, IndexingStatus.COMPLETED]
116
+ url = f"{self.base_url}/datasets/{dataset}/documents/{document}/indexing-status"
117
+ if from_status is None:
118
+ from_status = [IndexingStatus.WAITING, IndexingStatus.PARSING]
119
+ r = self.request(url, "GET")
120
+ status = r['indexing_status']
121
+ assert status in from_status, f"current status: {status}, expect: {from_status}"
122
+ while status not in expect_status:
123
+ sleep(interval)
124
+ r = self.request(url, "GET")
125
+ status = r['indexing_status']
126
+ if status == IndexingStatus.FAILED: raise IndexingError(r['error'])
127
+ return r
128
+
129
+
130
+ class DatasetResult(Dataset):
131
+ chunk_structure: str
132
+
133
+ class RunResult(BaseModel):
134
+ batch: str
135
+ dataset: DatasetResult
136
+ documents: list[Document]
137
+
138
+ class Load(ConsoleKnowledge):
139
+ """
140
+ Processing Documents
141
+ """
142
+
143
+ def async_run(self, pipeline: str, node: Node, inputs: dict, datasource_info_list: list[dict])->RunResult:
144
+ """Ingest new document"""
145
+ url = f"{self.base_url}/rag/pipelines/{pipeline}/workflows/published/run"
146
+ r = self.request(url, "POST", json={
147
+ 'inputs': inputs,
148
+ 'start_node_id': node.id,
149
+ 'is_preview': False,
150
+ 'response_mode': "blocking",
151
+ "datasource_info_list": datasource_info_list,
152
+ 'datasource_type': node.datasource_type
153
+ })
154
+ return RunResult(**r)
155
+
156
+
@@ -0,0 +1,30 @@
1
+ from davidkhala.ai.agent.dify.ops.console import API
2
+
3
+
4
+ class ConsoleUser(API):
5
+ def login(self, email, password,
6
+ *,
7
+ remember_me=True,
8
+ language="en-US"
9
+ ):
10
+ url = f"{self.base_url}/login"
11
+
12
+ r = self.request(url, "POST", json={
13
+ 'email': email,
14
+ 'password': password,
15
+ 'remember_me': remember_me,
16
+ 'language': language,
17
+ })
18
+ assert r == {"result": "success"}
19
+ self.options['headers']['x-csrf-token'] = self.session.cookies.get("csrf_token")
20
+ return self.session.cookies
21
+
22
+ @property
23
+ def me(self) -> dict:
24
+ url = f"{self.base_url}/account/profile"
25
+ return self.request(url, "GET")
26
+
27
+ @property
28
+ def workspace(self) -> dict:
29
+ url = f"{self.base_url}/features"
30
+ return self.request(url, "GET")
@@ -0,0 +1,17 @@
1
+ from typing import Any
2
+
3
+ from davidkhala.data.base.pg import Postgres
4
+
5
+
6
+ class DB(Postgres):
7
+
8
+ def __init__(self, connection_string: str):
9
+ super().__init__(connection_string)
10
+ self.connect()
11
+
12
+ def get_dict(self,
13
+ template: str,
14
+ values: dict[str, Any] | None = None,
15
+ request_options: dict[str, Any] | None = None
16
+ ) -> list[dict]:
17
+ return Postgres.rows_to_dicts(self.query(template, values, request_options))
@@ -1,22 +1,19 @@
1
- from typing import Any, Optional
2
-
3
- from davidkhala.data.base.pg import Postgres
4
- from sqlalchemy import desc
5
- from sqlalchemy.orm import Session
6
-
1
+ from davidkhala.ai.agent.dify.ops.db import DB
7
2
  from davidkhala.ai.agent.dify.ops.db.orm import AppModelConfig
3
+ from sqlalchemy.orm import Session
4
+ from sqlalchemy import desc
8
5
 
9
-
10
- class DB(Postgres):
11
-
12
- def __init__(self, connection_string: str):
13
- super().__init__(connection_string)
14
- self.connect()
15
-
16
- def get_dict(self, sql): return self.query(sql).mappings().all()
17
-
6
+ class Studio(DB):
18
7
  @property
19
- def accounts(self): return self.get_dict("select name, email from accounts where status = 'active'")
8
+ def user_feedbacks(self):
9
+ sql = """SELECT mf.conversation_id,
10
+ mf.content,
11
+ m.query,
12
+ m.answer
13
+ FROM message_feedbacks mf
14
+ LEFT JOIN messages m ON mf.message_id = m.id
15
+ WHERE mf.from_source = 'user'"""
16
+ return self.get_dict(sql)
20
17
 
21
18
  @property
22
19
  def apps(self): return self.get_dict("select id, name, mode from apps where status = 'normal'")
@@ -30,11 +27,11 @@ class DB(Postgres):
30
27
  .first()
31
28
  )
32
29
 
33
- def update_app_config(self, record: AppModelConfig, refresh:bool=False) -> AppModelConfig | None:
30
+ def update_app_config(self, record: AppModelConfig, refresh: bool = False) -> AppModelConfig | None:
34
31
  with Session(self.client) as session:
35
32
  session.add(record)
36
33
  session.commit()
37
34
  if refresh:
38
- session.refresh(record) # 刷新对象,确保拿到数据库生成的字段(如 id)
35
+ session.refresh(record)
39
36
  return record
40
- return None
37
+ return None
@@ -0,0 +1,52 @@
1
+ from davidkhala.ai.agent.dify.ops.db import DB
2
+ from davidkhala.ai.agent.dify.ops.db.orm import Graph
3
+
4
+
5
+ class Dataset(DB):
6
+
7
+ def dataset_queries(self, dataset_id, limit=20) -> list[str]:
8
+ template = "select content from dataset_queries where source = 'app' and created_by_role = 'end_user' and dataset_id = :dataset_id limit :limit"
9
+ return self.query(template, {'dataset_id': dataset_id, 'limit': limit}).scalars().all()
10
+
11
+ @property
12
+ def datasets(self):
13
+ template = "select id, name, description, indexing_technique, index_struct, embedding_model, embedding_model_provider, collection_binding_id, retrieval_model, icon_info, runtime_mode, pipeline_id, chunk_structure from datasets"
14
+ return self.get_dict(template)
15
+
16
+ def is_pipeline(self, id: str):
17
+ template = "select runtime_mode = 'rag_pipeline' from datasets where id = :id"
18
+ return self.query(template, {'id': id}).scalar()
19
+
20
+ @property
21
+ def data_source_credentials(self):
22
+ template = "select id, name, plugin_id, auth_type from datasource_providers"
23
+ return self.get_dict(template)
24
+
25
+ def credential_id_by(self, name, provider) -> list[str]:
26
+ template = "select id from datasource_providers where name = :name and provider = :provider"
27
+ return self.query(template, {'name': name, 'provider': provider}).scalars().all()
28
+
29
+
30
+ class Document(DB):
31
+ def hit_documents(self, top_k: int = 3):
32
+ template = "SELECT dataset_id, document_id, content FROM document_segments ORDER BY hit_count DESC LIMIT :top_k"
33
+ return self.get_dict(template, {'top_k': top_k})
34
+
35
+ def id_by(self, name) -> list[str]:
36
+ """multiple ids can be found"""
37
+ template = "select id from documents where name = :name"
38
+ return [str(uuid) for uuid in self.query(template, {'name': name}).scalars().all()]
39
+
40
+
41
+ class Pipeline(DB):
42
+ @property
43
+ def pipelines(self):
44
+ """unique syntax for pgsql"""
45
+ template = "SELECT DISTINCT ON (app_id) app_id, graph, rag_pipeline_variables FROM workflows where type = 'rag-pipeline' ORDER BY app_id, created_at DESC"
46
+ return Graph.convert(*self.get_dict(template))
47
+
48
+ def pipeline(self, app_id):
49
+ template = "select id, graph, rag_pipeline_variables from workflows where type = 'rag-pipeline' and app_id = :app_id"
50
+ dict_result = self.get_dict(template, {'app_id': app_id})
51
+ assert len(dict_result) < 2
52
+ return Graph.convert(*dict_result)
@@ -0,0 +1,151 @@
1
+ import json
2
+ from enum import Enum
3
+ from typing import Any, Literal
4
+
5
+ from pydantic import BaseModel
6
+ from sqlalchemy import Column, String, Text, JSON, TIMESTAMP, func
7
+ from sqlalchemy.dialects.postgresql import UUID
8
+ from sqlalchemy.orm import declarative_base
9
+
10
+ Base = declarative_base()
11
+
12
+
13
+ class DifyBase(Base):
14
+ __abstract__ = True # keyword for SQLAlchemy
15
+ id = Column(UUID(as_uuid=True), primary_key=True, server_default=func.uuid_generate_v4())
16
+
17
+
18
+ class AppModelConfig(DifyBase):
19
+ __tablename__ = "app_model_configs"
20
+ __table_args__ = {"schema": "public"}
21
+
22
+ app_id = Column(UUID(as_uuid=True), nullable=False)
23
+
24
+ provider = Column(String(255))
25
+ model_id = Column(String(255))
26
+ configs = Column(JSON)
27
+
28
+ created_at = Column(TIMESTAMP, nullable=False, server_default=func.current_timestamp())
29
+ updated_at = Column(TIMESTAMP, nullable=False, server_default=func.current_timestamp())
30
+
31
+ opening_statement = Column(Text)
32
+ suggested_questions = Column(Text)
33
+ suggested_questions_after_answer = Column(Text)
34
+ more_like_this = Column(Text)
35
+ model = Column(Text)
36
+ user_input_form = Column(Text)
37
+ pre_prompt = Column(Text)
38
+ agent_mode = Column(Text)
39
+ speech_to_text = Column(Text)
40
+ sensitive_word_avoidance = Column(Text)
41
+ retriever_resource = Column(Text)
42
+
43
+ dataset_query_variable = Column(String(255))
44
+ prompt_type = Column(String(255), nullable=False, server_default="simple")
45
+
46
+ chat_prompt_config = Column(Text)
47
+ completion_prompt_config = Column(Text)
48
+ dataset_configs = Column(Text)
49
+ external_data_tools = Column(Text)
50
+ file_upload = Column(Text)
51
+ text_to_speech = Column(Text)
52
+
53
+ created_by = Column(UUID(as_uuid=True))
54
+ updated_by = Column(UUID(as_uuid=True))
55
+
56
+ def __repr__(self):
57
+ return f"<AppModelConfig(id={self.id}, app_id={self.app_id}, provider={self.provider}, model_id={self.model_id})>"
58
+
59
+
60
+ class Position(BaseModel):
61
+ x: float
62
+ y: float
63
+
64
+
65
+ class NodeData(BaseModel):
66
+ class Type(str, Enum):
67
+ SOURCE = 'datasource'
68
+ CHUNKER = 'knowledge-index'
69
+ TOOL = 'tool'
70
+
71
+ type: Type | str # not limit to built-in types
72
+ title: str | None = None
73
+ selected: bool
74
+
75
+ # datasource
76
+ datasource_parameters: dict[str, Any] | None = None
77
+ datasource_configurations: dict[str, Any] | None = None
78
+ plugin_id: str | None = None
79
+ provider_type: str | None = None
80
+ provider_name: str | None = None
81
+ datasource_name: str | None = None
82
+ datasource_label: str | None = None
83
+ plugin_unique_identifier: str | None = None
84
+
85
+ # tool
86
+ tool_parameters: dict[str, Any] | None = None
87
+ tool_configurations: dict[str, Any] | None = None
88
+ tool_node_version: str | None = None
89
+ provider_id: str | None = None
90
+ provider_icon: str | None = None
91
+ tool_name: str | None = None
92
+ tool_label: str | None = None
93
+ tool_description: str | None = None
94
+ is_team_authorization: bool | None = None
95
+ paramSchemas: list[Any] | None = None
96
+ params: dict[str, Any] | None = None
97
+
98
+ # knowledge index
99
+ index_chunk_variable_selector: list[str] | None = None
100
+ keyword_number: int | None = None
101
+ retrieval_model: dict[str, Any] | None = None
102
+ chunk_structure: str | None = None
103
+ indexing_technique: str | None = None
104
+ embedding_model: str | None = None
105
+ embedding_model_provider: str | None = None
106
+
107
+
108
+ class Node(BaseModel):
109
+ @property
110
+ def datasource_type(self): return self.data.provider_type
111
+ id: str
112
+ type: Literal['custom']
113
+ data: NodeData
114
+ position: Position
115
+ targetPosition: str | None = None
116
+ sourcePosition: str | None = None
117
+ positionAbsolute: Position | None = None
118
+ width: float | None = None
119
+ height: float | None = None
120
+ selected: bool
121
+
122
+
123
+ class Edge(BaseModel):
124
+ id: str
125
+ type: str
126
+ source: str
127
+ target: str
128
+ sourceHandle: str | None = None
129
+ targetHandle: str | None = None
130
+ data: dict[str, Any] | None = None
131
+ zIndex: int | None = None
132
+
133
+
134
+ class Viewport(BaseModel):
135
+ x: float
136
+ y: float
137
+ zoom: float
138
+
139
+
140
+ class Graph(BaseModel):
141
+ nodes: list[Node]
142
+ edges: list[Edge]
143
+ viewport: Viewport
144
+
145
+ @property
146
+ def datasources(self):
147
+ return [node for node in self.nodes if node.data.type == NodeData.Type.SOURCE]
148
+
149
+ @staticmethod
150
+ def convert(*records: list[dict]):
151
+ return [{**record, "graph": Graph(**json.loads(record["graph"]))} for record in records]
@@ -0,0 +1,6 @@
1
+ from davidkhala.ai.agent.dify.ops.db import DB
2
+
3
+
4
+ class Info(DB):
5
+ @property
6
+ def accounts(self): return self.get_dict("select name, email from accounts where status = 'active'")
@@ -1,3 +1,5 @@
1
+ from typing import Literal
2
+
1
3
  from pydantic import BaseModel
2
4
 
3
5
  class JsonEntry(BaseModel):
@@ -8,3 +10,5 @@ class Output(BaseModel):
8
10
  text: str
9
11
  files: list
10
12
  json: list[JsonEntry]
13
+ class DataSourceTypeAware(BaseModel):
14
+ datasource_type: Literal["local_file", "online_document", "website_crawl"]
@@ -0,0 +1,19 @@
1
+ from pydantic import BaseModel
2
+
3
+ from davidkhala.ai.agent.dify.plugins import DataSourceTypeAware
4
+
5
+
6
+ class FileModel(BaseModel):
7
+ name: str
8
+ size: int
9
+ type: str
10
+ extension: str
11
+ mime_type: str
12
+ transfer_method: str
13
+ url: str
14
+ related_id: str
15
+
16
+
17
+ class DataSourceOutput(DataSourceTypeAware):
18
+ datasource_type:str = "local_file"
19
+ file: FileModel
@@ -0,0 +1,22 @@
1
+ from pydantic import BaseModel
2
+
3
+ from davidkhala.ai.agent.dify.plugins import DataSourceTypeAware
4
+
5
+
6
+ class DataSourceInfo(BaseModel):
7
+ source_url: str
8
+ content: str
9
+ title: str
10
+ description: str
11
+
12
+
13
+ class DataSourceOutput(DataSourceTypeAware, DataSourceInfo):
14
+ datasource_type: str = "website_crawl"
15
+
16
+
17
+ class CredentialAware(BaseModel):
18
+ credential_id: str | None
19
+
20
+
21
+ class Console(DataSourceOutput, CredentialAware):
22
+ pass
@@ -0,0 +1,4 @@
1
+ from davidkhala.ai.agent.dify.plugins.firecrawl import DataSourceOutput as FirecrawlDataSourceOutput
2
+
3
+ class DataSourceOutput(FirecrawlDataSourceOutput):
4
+ """so far they are the same"""
@@ -7,7 +7,7 @@ from openrouter import OpenRouter
7
7
  class Client(AbstractClient):
8
8
  def __init__(self, api_key: str):
9
9
  self.api_key = api_key
10
- self.client = OpenRouter(api_key=api_key)
10
+ self.client = OpenRouter(api_key)
11
11
 
12
12
  def chat(self, *user_prompt, **kwargs):
13
13
  r = self.client.chat.send(
@@ -20,8 +20,16 @@ class Client(AbstractClient):
20
20
  return [_.message.content for _ in r.choices]
21
21
  def connect(self):
22
22
  try:
23
- self.client.api_keys.list()
23
+ self.client.models.list()
24
24
  return True
25
25
  except UnauthorizedResponseError:
26
26
  return False
27
27
 
28
+
29
+ class Admin:
30
+ def __init__(self, provisioning_key: str):
31
+ self.provisioning_key = provisioning_key
32
+ self.client = OpenRouter(provisioning_key)
33
+ @property
34
+ def keys(self):
35
+ return self.client.api_keys.list().data
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "davidkhala.ai"
3
- version = "0.1.6"
3
+ version = "0.1.7"
4
4
  description = "misc AI modules"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13"
@@ -8,18 +8,16 @@ requires-python = ">=3.13"
8
8
  [project.optional-dependencies]
9
9
  langchain = [
10
10
  "langgraph", "langchain",
11
- "langchain-openai" # required by openRouter
11
+ "langchain-openai ; python_version < '3.14'" # required by openRouter
12
12
  ]
13
13
  ragflow = ["ragflow-sdk"]
14
14
  google = ["google-genai", "google-adk"]
15
15
  api = ["davidkhala.utils[http_request]"]
16
16
  hf = [
17
17
  "huggingface_hub", 'hf_xet',
18
- 'onnxruntime','onnx' # for test only
19
- ]
20
- openrouter = [
21
- "openrouter", "davidkhala.utils[http_request]"
18
+ 'onnxruntime', 'onnx' # for test only
22
19
  ]
20
+ openrouter = ["openrouter"]
23
21
  ali = ["dashscope"]
24
22
  azure = ["openai"]
25
23
  telemetry = [
@@ -1,38 +0,0 @@
1
- import json
2
-
3
- import requests
4
-
5
- from davidkhala.ai.agent.dify.api import API
6
-
7
-
8
- class Feedbacks(API):
9
- def paginate_feedbacks(self, page=1, size=20):
10
- """
11
- when 'rating'='like', content=None
12
- when 'rating'='dislike', content can be filled by end user
13
- NOTE: for security reason, api cannot access conversation context associated with the feedback. End user should copy the conversation to comment by themselves.
14
- """
15
- response = requests.get(f"{self.base_url}/app/feedbacks", params={"page": page, "limit": size}, **self.options)
16
- if not response.ok:
17
- response.raise_for_status()
18
- else:
19
- return json.loads(response.text)
20
-
21
- def list_feedbacks(self):
22
- # TODO https://github.com/langgenius/dify/issues/28067
23
- return self.paginate_feedbacks()['data']
24
-
25
- class Conversation(API):
26
- """
27
- Note: The Service API does not share conversations created by the WebApp. Conversations created through the API are isolated from those created in the WebApp interface.
28
- It means you cannot get user conversation content from API, API call has only access to conversation created by API
29
- """
30
- def __init__(self, api_key: str, user: str):
31
- super().__init__(api_key) # base_url need to be configured afterward if not default
32
- self.user = user # user_id, from_end_user_id
33
-
34
- def paginate_messages(self, conversation_id):
35
- return self.request(f"{self.base_url}/messages", "GET", params={
36
- 'conversation_id': conversation_id,
37
- 'user': self.user,
38
- })
@@ -1,50 +0,0 @@
1
- from sqlalchemy import (
2
- Column, String, Text, JSON, TIMESTAMP,
3
- func
4
- )
5
- from sqlalchemy.dialects.postgresql import UUID
6
- from sqlalchemy.orm import declarative_base
7
-
8
- Base = declarative_base()
9
-
10
- class AppModelConfig(Base):
11
- __tablename__ = "app_model_configs"
12
- __table_args__ = {"schema": "public"}
13
-
14
- id = Column(UUID(as_uuid=True), primary_key=True, server_default=func.uuid_generate_v4())
15
- app_id = Column(UUID(as_uuid=True), nullable=False)
16
-
17
- provider = Column(String(255))
18
- model_id = Column(String(255))
19
- configs = Column(JSON)
20
-
21
- created_at = Column(TIMESTAMP, nullable=False, server_default=func.current_timestamp())
22
- updated_at = Column(TIMESTAMP, nullable=False, server_default=func.current_timestamp())
23
-
24
- opening_statement = Column(Text)
25
- suggested_questions = Column(Text)
26
- suggested_questions_after_answer = Column(Text)
27
- more_like_this = Column(Text)
28
- model = Column(Text)
29
- user_input_form = Column(Text)
30
- pre_prompt = Column(Text)
31
- agent_mode = Column(Text)
32
- speech_to_text = Column(Text)
33
- sensitive_word_avoidance = Column(Text)
34
- retriever_resource = Column(Text)
35
-
36
- dataset_query_variable = Column(String(255))
37
- prompt_type = Column(String(255), nullable=False, server_default="simple")
38
-
39
- chat_prompt_config = Column(Text)
40
- completion_prompt_config = Column(Text)
41
- dataset_configs = Column(Text)
42
- external_data_tools = Column(Text)
43
- file_upload = Column(Text)
44
- text_to_speech = Column(Text)
45
-
46
- created_by = Column(UUID(as_uuid=True))
47
- updated_by = Column(UUID(as_uuid=True))
48
-
49
- def __repr__(self):
50
- return f"<AppModelConfig(id={self.id}, app_id={self.app_id}, provider={self.provider}, model_id={self.model_id})>"
File without changes
File without changes