davidkhala.ai 0.2.1__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/PKG-INFO +8 -6
  2. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/dify/api/__init__.py +2 -2
  3. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/dify/api/app.py +10 -6
  4. davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/api/knowledge/chunk.py +14 -0
  5. davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/api/knowledge/dataset.py +82 -0
  6. davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/api/knowledge/document.py +42 -0
  7. davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/api/knowledge/model.py +139 -0
  8. {davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/ops → davidkhala_ai-0.2.2/davidkhala/ai/agent/dify}/console/__init__.py +7 -1
  9. davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/console/knowledge/dataset.py +61 -0
  10. davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/console/knowledge/pipeline.py +127 -0
  11. {davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/ops → davidkhala_ai-0.2.2/davidkhala/ai/agent/dify}/console/plugin.py +20 -6
  12. davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/console/session.py +50 -0
  13. davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/db/orm.py +65 -0
  14. davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/model/__init__.py +7 -0
  15. davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/model.py → davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/model/knowledge.py +1 -12
  16. davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/ops/db/orm.py → davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/model/workflow.py +24 -62
  17. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/langgraph.py +1 -1
  18. davidkhala_ai-0.2.2/davidkhala/ai/ali/__init__.py +0 -0
  19. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/ali/dashscope.py +15 -14
  20. davidkhala_ai-0.2.2/davidkhala/ai/anthropic/__init__.py +6 -0
  21. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/api/__init__.py +6 -19
  22. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/api/openrouter.py +14 -10
  23. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/api/siliconflow.py +2 -4
  24. davidkhala_ai-0.2.2/davidkhala/ai/atlas/__init__.py +24 -0
  25. davidkhala_ai-0.2.2/davidkhala/ai/google/__init__.py +0 -0
  26. davidkhala_ai-0.2.2/davidkhala/ai/mistral/__init__.py +15 -0
  27. davidkhala_ai-0.2.2/davidkhala/ai/mistral/agent.py +50 -0
  28. davidkhala_ai-0.2.2/davidkhala/ai/mistral/ai.py +40 -0
  29. davidkhala_ai-0.2.2/davidkhala/ai/mistral/file.py +38 -0
  30. davidkhala_ai-0.2.2/davidkhala/ai/mistral/ocr.py +46 -0
  31. davidkhala_ai-0.2.2/davidkhala/ai/model/__init__.py +28 -0
  32. davidkhala_ai-0.2.2/davidkhala/ai/model/chat.py +75 -0
  33. davidkhala_ai-0.2.2/davidkhala/ai/model/embed.py +8 -0
  34. davidkhala_ai-0.2.2/davidkhala/ai/model/garden.py +9 -0
  35. davidkhala_ai-0.2.2/davidkhala/ai/openai/__init__.py +48 -0
  36. davidkhala_ai-0.2.2/davidkhala/ai/openai/azure.py +84 -0
  37. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/openai/native.py +2 -3
  38. davidkhala_ai-0.2.2/davidkhala/ai/openrouter/__init__.py +47 -0
  39. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/pyproject.toml +5 -7
  40. davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/api/knowledge.py +0 -191
  41. davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/ops/__init__.py +0 -1
  42. davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/ops/console/knowledge.py +0 -158
  43. davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/ops/console/session.py +0 -32
  44. davidkhala_ai-0.2.1/davidkhala/ai/huggingface/BAAI.py +0 -10
  45. davidkhala_ai-0.2.1/davidkhala/ai/huggingface/__init__.py +0 -21
  46. davidkhala_ai-0.2.1/davidkhala/ai/huggingface/inference.py +0 -13
  47. davidkhala_ai-0.2.1/davidkhala/ai/mistral/__init__.py +0 -33
  48. davidkhala_ai-0.2.1/davidkhala/ai/model/__init__.py +0 -44
  49. davidkhala_ai-0.2.1/davidkhala/ai/model/chat.py +0 -19
  50. davidkhala_ai-0.2.1/davidkhala/ai/openai/__init__.py +0 -72
  51. davidkhala_ai-0.2.1/davidkhala/ai/openai/azure.py +0 -33
  52. davidkhala_ai-0.2.1/davidkhala/ai/openrouter/__init__.py +0 -36
  53. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/.gitignore +0 -0
  54. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/README.md +0 -0
  55. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/__init__.py +0 -0
  56. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/README.md +0 -0
  57. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/__init__.py +0 -0
  58. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/dify/__init__.py +0 -0
  59. {davidkhala_ai-0.2.1/davidkhala/ai/ali → davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/api/knowledge}/__init__.py +0 -0
  60. {davidkhala_ai-0.2.1/davidkhala/ai/google → davidkhala_ai-0.2.2/davidkhala/ai/agent/dify/console/knowledge}/__init__.py +0 -0
  61. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/dify/const.py +0 -0
  62. {davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/ops → davidkhala_ai-0.2.2/davidkhala/ai/agent/dify}/db/__init__.py +0 -0
  63. {davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/ops → davidkhala_ai-0.2.2/davidkhala/ai/agent/dify}/db/app.py +0 -0
  64. {davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/ops → davidkhala_ai-0.2.2/davidkhala/ai/agent/dify}/db/knowledge.py +0 -0
  65. {davidkhala_ai-0.2.1/davidkhala/ai/agent/dify/ops → davidkhala_ai-0.2.2/davidkhala/ai/agent/dify}/db/sys.py +0 -0
  66. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/dify/interface.py +0 -0
  67. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/dify/plugins/__init__.py +0 -0
  68. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/dify/plugins/file.py +0 -0
  69. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/dify/plugins/firecrawl.py +0 -0
  70. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/dify/plugins/jina.py +0 -0
  71. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/dify/plugins/popular.py +0 -0
  72. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/agent/ragflow.py +0 -0
  73. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/ali/agentbay.py +0 -0
  74. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/google/adk.py +0 -0
  75. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/google/gemini.py +0 -0
  76. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/openai/databricks.py +0 -0
  77. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/openai/opik.py +0 -0
  78. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/opik.py +0 -0
  79. {davidkhala_ai-0.2.1 → davidkhala_ai-0.2.2}/davidkhala/ai/you.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: davidkhala.ai
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: misc AI modules
5
5
  Requires-Python: >=3.12
6
6
  Provides-Extra: ali
@@ -9,7 +9,10 @@ Requires-Dist: davidkhala-utils; extra == 'ali'
9
9
  Requires-Dist: wuying-agentbay-sdk; extra == 'ali'
10
10
  Provides-Extra: api
11
11
  Requires-Dist: davidkhala-utils[http-request]; extra == 'api'
12
+ Provides-Extra: atlas
13
+ Requires-Dist: voyageai; extra == 'atlas'
12
14
  Provides-Extra: azure
15
+ Requires-Dist: davidkhala-ml-ocr; extra == 'azure'
13
16
  Requires-Dist: davidkhala-utils; extra == 'azure'
14
17
  Requires-Dist: openai; extra == 'azure'
15
18
  Provides-Extra: dify
@@ -19,16 +22,15 @@ Requires-Dist: dify-plugin; extra == 'dify'
19
22
  Provides-Extra: google
20
23
  Requires-Dist: google-adk; extra == 'google'
21
24
  Requires-Dist: google-genai; extra == 'google'
22
- Provides-Extra: hf
23
- Requires-Dist: hf-xet; extra == 'hf'
24
- Requires-Dist: huggingface-hub; extra == 'hf'
25
- Requires-Dist: onnx; extra == 'hf'
26
- Requires-Dist: onnxruntime; extra == 'hf'
27
25
  Provides-Extra: langchain
28
26
  Requires-Dist: langchain; extra == 'langchain'
29
27
  Requires-Dist: langchain-openai; (python_version < '3.14') and extra == 'langchain'
30
28
  Requires-Dist: langgraph; extra == 'langchain'
29
+ Provides-Extra: minimax
30
+ Requires-Dist: anthropic; extra == 'minimax'
31
+ Requires-Dist: openai; extra == 'minimax'
31
32
  Provides-Extra: mistral
33
+ Requires-Dist: davidkhala-ml-ocr; extra == 'mistral'
32
34
  Requires-Dist: mistralai; extra == 'mistral'
33
35
  Provides-Extra: openrouter
34
36
  Requires-Dist: openrouter; extra == 'openrouter'
@@ -1,4 +1,4 @@
1
- from typing import Iterable, Callable, Any, Optional
1
+ from typing import Iterable, Callable, Any
2
2
 
3
3
  from davidkhala.utils.http_request import Request
4
4
 
@@ -14,7 +14,7 @@ class Iterator(Iterable):
14
14
  def __iter__(self):
15
15
  return self
16
16
 
17
- def __init__(self, get_fn: Callable[[int, int], Any], r: Optional[dict]):
17
+ def __init__(self, get_fn: Callable[[int, int], Any], r: dict|None):
18
18
  self.response = r
19
19
  self.fn = get_fn
20
20
 
@@ -77,12 +77,16 @@ class Conversation(API):
77
77
  r: Conversation.ChatResult = {
78
78
  'thought': [],
79
79
  }
80
- for data in as_sse(response):
81
- match data['event']:
82
- case 'agent_thought':
83
- r['thought'].append(data['thought'])
84
- case 'message_end':
85
- r['metadata'] = data['metadata']
80
+
81
+ for line in response.iter_lines():
82
+ if line and line!=b'event: ping':
83
+ data = json.loads(line[5:].decode())
84
+ match data['event']:
85
+ case 'agent_thought':
86
+ r['thought'].append(data['thought'])
87
+ case 'message_end':
88
+ r['metadata'] = data['metadata']
89
+
86
90
  return r
87
91
 
88
92
  def agent_chat(self, template: str, **kwargs) -> ChatResult:
@@ -0,0 +1,14 @@
1
+ from __future__ import annotations
2
+
3
+ from davidkhala.ai.agent.dify.api import API
4
+ from davidkhala.ai.agent.dify.api.knowledge.model import DocumentModel
5
+
6
+
7
+ class Chunk(API):
8
+ def __init__(self, d: DocumentModel, segment_id: str):
9
+ super().__init__(d.api_key, f"{d.base_url}/segments/{segment_id}")
10
+
11
+ def get(self):
12
+ r = self.request(self.base_url, "GET")
13
+ assert r['doc_form'] # optional value text_model
14
+ return r['data']
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+ from typing import Iterable
6
+ from urllib.parse import urlparse
7
+
8
+ import requests
9
+
10
+ from davidkhala.ai.agent.dify.api import API, Iterator
11
+ from davidkhala.ai.agent.dify.api.knowledge.model import DatasetModel, DocumentModel
12
+
13
+
14
+ class Dataset(API):
15
+ def __init__(self, api_key: str, base_url="https://api.dify.ai/v1"):
16
+ super().__init__(api_key, f"{base_url}/datasets")
17
+
18
+ def paginate_datasets(self, page=1, size=20):
19
+ r = self.request(self.base_url, "GET", params={
20
+ 'page': page,
21
+ 'limit': size,
22
+ })
23
+ return r
24
+
25
+ def list_datasets(self) -> Iterable[list[DatasetModel]]:
26
+ return Iterator(self.paginate_datasets, None)
27
+
28
+ @property
29
+ def ids(self):
30
+ for sub_list in self.list_datasets():
31
+ for dataset in sub_list:
32
+ yield dataset['id']
33
+
34
+ class Instance(API):
35
+ def __init__(self, d: Dataset, dataset_id: str):
36
+ super().__init__(d.api_key, f"{d.base_url}/{dataset_id}")
37
+
38
+ def get(self)-> DatasetModel:
39
+ d = self.request(self.base_url, "GET")
40
+ return DatasetModel.model_validate(d)
41
+
42
+ def upload(self, filename, *, path=None, url=None, document_id=None):
43
+ """
44
+ don't work for .html
45
+ work for .md
46
+ """
47
+ files = {}
48
+ if path:
49
+ with open(path, 'rb') as f:
50
+ content = f.read()
51
+ if not filename:
52
+ filename = os.path.basename(path)
53
+ elif url:
54
+ r = requests.get(url)
55
+ r.raise_for_status()
56
+ if not filename:
57
+ parsed_url = urlparse(url)
58
+ filename = Path(parsed_url.path).name
59
+ content = r.content
60
+ files['file'] = (filename, content)
61
+ if document_id:
62
+ # don't work for html
63
+ r = requests.post(f"{self.base_url}/documents/{document_id}/update-by-file", files=files,
64
+ **self.options)
65
+ else:
66
+ r = requests.post(f"{self.base_url}/document/create-by-file", files=files, **self.options)
67
+ r = self.on_response(r)
68
+ return r['document']
69
+
70
+ def paginate_documents(self, page=1, size=20):
71
+ return self.request(f"{self.base_url}/documents", "GET", params={
72
+ 'page': page,
73
+ 'limit': size
74
+ })
75
+
76
+ def list_documents(self) -> Iterable[DocumentModel]:
77
+ for document_batch in Iterator(self.paginate_documents, None):
78
+ for document in document_batch:
79
+ yield DocumentModel(**document)
80
+
81
+ def has_document(self, name) -> bool:
82
+ return any(name == item['name'] for row in self.list_documents() for item in row)
@@ -0,0 +1,42 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Iterable
4
+
5
+ import requests
6
+
7
+ from davidkhala.ai.agent.dify.api import API, Iterator
8
+ from davidkhala.ai.agent.dify.api.knowledge.dataset import Dataset
9
+ from davidkhala.ai.agent.dify.api.knowledge.model import ChunkDict
10
+
11
+
12
+ class Document(API):
13
+ def __init__(self, d: Dataset.Instance, document_id: str):
14
+ super().__init__(d.api_key, f"{d.base_url}/documents/{document_id}")
15
+
16
+ def exist(self):
17
+ try:
18
+ self.get()
19
+ return True
20
+ except requests.exceptions.HTTPError as e:
21
+ if e.response.status_code == 404:
22
+ return False
23
+ else:
24
+ raise e
25
+
26
+ def get(self):
27
+ return self.request(self.base_url, "GET")
28
+
29
+ def paginate_chunks(self, page=1, size=20):
30
+ return self.request(f"{self.base_url}/segments", "GET", params={
31
+ 'page': page,
32
+ 'limit': size
33
+ })
34
+
35
+ def list_chunks(self) -> Iterable[ChunkDict]:
36
+ for chunk_batch in Iterator(self.paginate_chunks, None):
37
+ for chunk in chunk_batch:
38
+ yield chunk
39
+
40
+ def delete(self):
41
+ if self.exist():
42
+ self.request(self.base_url, "DELETE")
@@ -0,0 +1,139 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List, Any, TypedDict
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from davidkhala.ai.agent.dify.model.knowledge import Document as DocumentBase
8
+
9
+
10
+ class RerankingModel(BaseModel):
11
+ reranking_provider_name: str | None
12
+ reranking_model_name: str | None
13
+
14
+
15
+ class KeywordSetting(BaseModel):
16
+ keyword_weight: float
17
+
18
+
19
+ class VectorSetting(BaseModel):
20
+ vector_weight: float
21
+ embedding_model_name: str
22
+ embedding_provider_name: str
23
+
24
+
25
+ class Weights(BaseModel):
26
+ weight_type: str | None
27
+ keyword_setting: KeywordSetting | None
28
+ vector_setting: VectorSetting | None
29
+
30
+
31
+ class RetrievalModelDict(BaseModel):
32
+ search_method: str
33
+ reranking_enable: bool
34
+ reranking_mode: str | None
35
+ reranking_model: RerankingModel | None
36
+ weights: Weights | None
37
+ top_k: int | None
38
+ score_threshold_enabled: bool
39
+ score_threshold: float
40
+
41
+
42
+ class ExternalKnowledgeInfo(BaseModel):
43
+ external_knowledge_id: str | None
44
+ external_knowledge_api_id: str | None
45
+ external_knowledge_api_name: str | None
46
+ external_knowledge_api_endpoint: str | None
47
+
48
+
49
+ class ExternalRetrievalModel(BaseModel):
50
+ top_k: int
51
+ score_threshold: float
52
+ score_threshold_enabled: bool
53
+
54
+
55
+ class IconInfo(BaseModel):
56
+ icon_type: str
57
+ icon: str | None
58
+ icon_background: str | None
59
+ icon_url: str | None
60
+
61
+
62
+ class DatasetModel(BaseModel):
63
+ id: str
64
+ name: str
65
+ description: str | None = None
66
+ provider: str
67
+ permission: str
68
+ data_source_type: str | None = None
69
+ indexing_technique: str | None = None
70
+ app_count: int
71
+ document_count: int
72
+ word_count: int
73
+ created_by: str | None = None
74
+ author_name: str | None = None
75
+ created_at: int | None = None
76
+ updated_by: str | None = None
77
+ updated_at: int | None = None
78
+ embedding_model: str | None = None
79
+ embedding_model_provider: str | None = None
80
+ embedding_available: bool
81
+ retrieval_model_dict: RetrievalModelDict | None = None
82
+ tags: List[Any] = []
83
+ doc_form: str | None = None
84
+ external_knowledge_info: ExternalKnowledgeInfo | None = None
85
+ external_retrieval_model: ExternalRetrievalModel | None = None
86
+ doc_metadata: List[Any] = []
87
+ built_in_field_enabled: bool
88
+ pipeline_id: str | None = None
89
+ runtime_mode: str | None = None
90
+ chunk_structure: str | None = None
91
+ icon_info: IconInfo | None = None
92
+ is_published: bool
93
+ total_documents: int | None = None
94
+ total_available_documents: int | None = None
95
+ enable_api: bool
96
+ is_multimodal: bool
97
+
98
+
99
+ class DocumentModel(DocumentBase):
100
+ data_source_info: dict[str, str]
101
+ data_source_detail_dict: dict[str, dict]
102
+ dataset_process_rule_id: str
103
+ created_from: str
104
+ created_by: str
105
+ created_at: int
106
+ tokens: int
107
+ archived: bool
108
+ display_status: str
109
+ word_count: int
110
+ hit_count: int
111
+ doc_form: str
112
+ doc_metadata: dict
113
+ disabled_at: int
114
+ disabled_by: str
115
+
116
+
117
+ class ChunkDict(TypedDict):
118
+ id: str
119
+ position: int
120
+ document_id: str
121
+ content: str
122
+ sign_content: str # trimmed version of content
123
+ answer: str | None # only used in QA chunk
124
+ word_count: int
125
+ tokens: int
126
+ keywords: list[str] | None
127
+ index_node_id: str # chunk 在向量索引中的节点 ID
128
+ index_node_hash: str # hash of sign_content
129
+ hit_count: int
130
+ enabled: bool
131
+ status: str # 'completed'
132
+ created_at: int # timestamp
133
+ updated_at: int # timestamp
134
+ completed_at: int # timestamp
135
+ created_by: str # user id
136
+ child_chunks: list
137
+ error: Any | None
138
+ stopped_at: int | None # timestamp
139
+ disabled_at: int | None # timestamp
@@ -1,9 +1,15 @@
1
+ from enum import Enum
2
+
1
3
  from davidkhala.utils.http_request import Request
2
4
 
3
5
 
4
6
  class API(Request):
7
+
5
8
  def __init__(self, base_url='http://localhost'):
9
+ """
10
+ :param base_url: "{protocol}://{host}". For Dify cloud, it is 'https://cloud.dify.ai'
11
+ """
6
12
  super().__init__()
7
13
  self.base_url = f"{base_url}/console/api"
8
- self.__enter__()
14
+ self.open()
9
15
 
@@ -0,0 +1,61 @@
1
+ from time import sleep
2
+
3
+ from davidkhala.ai.agent.dify.console.session import ConsoleDerived
4
+ from davidkhala.ai.agent.dify.const import IndexingStatus
5
+ from davidkhala.ai.agent.dify.interface import IndexingError
6
+
7
+
8
+ class Operation(ConsoleDerived):
9
+ def website_sync(self, dataset: str, document: str, *, wait_until=True):
10
+ """
11
+ cannot be used towards a pipeline dataset. Otherwise, you will see error "no website import info found"
12
+ """
13
+ doc_url = f"{self.base_url}/datasets/{dataset}/documents/{document}"
14
+
15
+ r = self.request(f"{doc_url}/website-sync", "GET")
16
+ assert r == {"result": "success"}
17
+ if wait_until:
18
+ return self.wait_until(dataset, document)
19
+ return None
20
+
21
+ def retry(self, dataset: str, *documents: str, wait_until=True):
22
+ """
23
+ It cannot trigger rerun on success documents
24
+ """
25
+ url = f"{self.base_url}/datasets/{dataset}/retry"
26
+ self.request(url, "POST", json={
27
+ 'document_ids': documents,
28
+ })
29
+ # response status code will be 204
30
+ if wait_until:
31
+ return [self.wait_until(dataset, document) for document in documents]
32
+ return None
33
+
34
+ def rerun(self, dataset: str, *documents: str):
35
+ for document in documents:
36
+ try:
37
+ self.website_sync(dataset, document)
38
+ assert False, "expect IndexingError"
39
+ except IndexingError:
40
+ pass
41
+ return self.retry(dataset, *documents)
42
+
43
+ def wait_until(self, dataset: str, document: str, *,
44
+ expect_status=None,
45
+ from_status=None,
46
+ interval=1
47
+ ):
48
+ if not expect_status:
49
+ expect_status = [IndexingStatus.FAILED, IndexingStatus.COMPLETED]
50
+ url = f"{self.base_url}/datasets/{dataset}/documents/{document}/indexing-status"
51
+ if from_status is None:
52
+ from_status = [IndexingStatus.WAITING, IndexingStatus.PARSING]
53
+ r = self.request(url, "GET")
54
+ status = r['indexing_status']
55
+ assert status in from_status, f"current status: {status}, expect: {from_status}"
56
+ while status not in expect_status:
57
+ sleep(interval)
58
+ r = self.request(url, "GET")
59
+ status = r['indexing_status']
60
+ if status == IndexingStatus.FAILED: raise IndexingError(r['error'])
61
+ return r
@@ -0,0 +1,127 @@
1
+ from typing import Any
2
+
3
+ from davidkhala.utils.http_request.stream import Request as StreamRequest, as_sse
4
+ from pydantic import BaseModel, Field
5
+
6
+ from davidkhala.ai.agent.dify.console.session import ConsoleDerived
7
+ from davidkhala.ai.agent.dify.model import User
8
+ from davidkhala.ai.agent.dify.model.knowledge import Dataset, Document
9
+ from davidkhala.ai.agent.dify.model.workflow import NodeProtocol, Graph
10
+
11
+
12
+ class RAGPipelineVariable(BaseModel):
13
+ label: str
14
+ variable: str
15
+ type: str
16
+ belong_to_node_id: str
17
+ max_length: int | None = None
18
+ required: bool = False
19
+ unit: str | None = None
20
+ default_value: Any | None = None
21
+ options: list[Any] = Field(default_factory=list)
22
+ placeholder: str | None = None
23
+ tooltips: str | None = None
24
+ allowed_file_types: str | None = None
25
+ allow_file_extension: str | None = None
26
+ allow_file_upload_methods: str | None = None
27
+
28
+
29
+ class PipelineModel(BaseModel):
30
+ id: str
31
+ graph: Graph
32
+ features: dict[str, Any] = Field(default_factory=dict)
33
+ hash: str
34
+ version: str
35
+ marked_name: str = ""
36
+ marked_comment: str = ""
37
+ created_by: User
38
+ created_at: int
39
+ updated_by: User | None = None
40
+ updated_at: int
41
+ tool_published: bool = False
42
+ environment_variables: list[dict[str, Any]]
43
+ conversation_variables: list[dict[str, Any]]
44
+ rag_pipeline_variables: list[RAGPipelineVariable]
45
+
46
+
47
+ class DatasetResult(Dataset):
48
+ chunk_structure: str
49
+
50
+
51
+ class RunResult(BaseModel):
52
+ batch: str
53
+ dataset: DatasetResult
54
+ documents: list[Document]
55
+
56
+
57
+ class Pipeline(ConsoleDerived):
58
+
59
+ def async_run(self, pipeline: str, node: NodeProtocol, inputs: dict, datasource_info_list: list[dict]) -> RunResult:
60
+ """Ingest new document"""
61
+ url = f"{self.base_url}/rag/pipelines/{pipeline}/workflows/published/run"
62
+ r = self.request(url, "POST", json={
63
+ 'inputs': inputs,
64
+ 'start_node_id': node.id,
65
+ 'is_preview': False,
66
+ 'response_mode': "blocking",
67
+ "datasource_info_list": datasource_info_list,
68
+ 'datasource_type': node.datasource_type
69
+ })
70
+ return RunResult.model_validate(r)
71
+
72
+ def get(self, pipeline: str):
73
+ url = f"{self.base_url}/rag/pipelines/{pipeline}/workflows/publish"
74
+ r = self.request(url, "GET")
75
+ return PipelineModel.model_validate(r)
76
+
77
+
78
+ class Datasource(ConsoleDerived):
79
+ class FirecrawlOutput(BaseModel):
80
+ source_url: str
81
+ description: str
82
+ title: str
83
+ credential_id: str
84
+ content: str
85
+
86
+ def run_firecrawl(self, pipeline: str, node: NodeProtocol,
87
+ *,
88
+ inputs: dict,
89
+ credential_id: str
90
+ ):
91
+
92
+ url = f"{self.base_url}/rag/pipelines/{pipeline}/workflows/published/datasource/nodes/{node.id}/run"
93
+
94
+ stream_request = StreamRequest(self)
95
+ response = stream_request.request(url, 'POST', json={
96
+ 'inputs': inputs,
97
+ 'datasource_type': node.datasource_type,
98
+ 'credential_id': credential_id,
99
+ "response_mode": "streaming"
100
+ })
101
+
102
+ for data in as_sse(response):
103
+ event = data['event']
104
+ if event == 'datasource_completed':
105
+ return data['data']
106
+ else:
107
+ assert event == 'datasource_processing'
108
+ print(data)
109
+ return None
110
+
111
+ def upload(self):
112
+ "http://localhost/console/api/files/upload?source=datasets"
113
+ # TODO
114
+ "form data"
115
+ {
116
+ "file": "body"
117
+ }
118
+ r = {
119
+ "id": "3898db5b-eb72-4f11-b507-628ad5d28887",
120
+ "name": "Professional Diploma Meister Power Electrical Engineering - Technological and Higher Education Institute of Hong Kong.html",
121
+ "size": 254362,
122
+ "extension": "html",
123
+ "mime_type": "text\/html",
124
+ "created_by": "dbd0b38b-5ef1-4123-8c3f-0c82eb1feacd",
125
+ "created_at": 1764943811,
126
+ "source_url": "\/files\/3898db5b-eb72-4f11-b507-628ad5d28887\/file-preview?timestamp=1764943811&nonce=43b0ff5a13372415be79de4cc7ef398c&sign=7OJ2wiVYc4tygl7yvM1sPn7s0WXDlhHxgX76bsGTD94%3D"
127
+ }
@@ -1,15 +1,12 @@
1
1
  from time import sleep
2
2
 
3
- from davidkhala.ai.agent.dify.ops.console import API
4
- from davidkhala.ai.agent.dify.ops.console.session import ConsoleUser
3
+ from davidkhala.ai.agent.dify.console.session import ConsoleUser, ConsoleDerived
5
4
 
6
5
 
7
- class ConsolePlugin(API):
6
+ class ConsolePlugin(ConsoleDerived):
8
7
  def __init__(self, context: ConsoleUser):
9
- super().__init__()
8
+ super().__init__(context)
10
9
  self.base_url = f"{context.base_url}/workspaces/current/plugin"
11
- self.session.cookies = context.session.cookies
12
- self.options = context.options
13
10
 
14
11
  def upgrade(self, *plugin_names: str) -> list[dict]:
15
12
  versions = self.latest_version(*plugin_names)
@@ -66,3 +63,20 @@ class ConsolePlugin(API):
66
63
  for name in plugin_names:
67
64
  r = self.get(name)
68
65
  self.uninstall(r[0]['id'])
66
+
67
+
68
+ class ConsoleTool(ConsoleDerived):
69
+ def __init__(self, context: ConsoleUser):
70
+ super().__init__(context)
71
+ self.base_url = f"{context.base_url}/workspaces/current/tool-provider"
72
+
73
+ def credential_id_by(self, name, owner: str, plugin: str):
74
+ for c in self.credentials_of(owner, plugin):
75
+ if c['name'] == name:
76
+ return c['id']
77
+ return None
78
+
79
+ def credentials_of(self, owner: str, plugin: str):
80
+ url = f"{self.base_url}/builtin/{owner}/{plugin}/{plugin}/credential/info"
81
+ r = self.request(url, method="GET")
82
+ return r['credentials']
@@ -0,0 +1,50 @@
1
+ from base64 import b64encode
2
+
3
+ from requests.cookies import RequestsCookieJar
4
+
5
+ from davidkhala.ai.agent.dify.console import API
6
+
7
+
8
+ class ConsoleUser(API):
9
+ def login(self, email, password,
10
+ *,
11
+ remember_me=True,
12
+ language="en-US"
13
+ ) -> RequestsCookieJar:
14
+ url = f"{self.base_url}/login"
15
+
16
+ r = self.request(url, "POST", json={
17
+ 'email': email,
18
+ 'password': b64encode(password.encode()).decode(), # use base64 from dify 1.11
19
+ 'remember_me': remember_me,
20
+ 'language': language,
21
+ })
22
+ assert r == {"result": "success"}
23
+ self.options['headers']['x-csrf-token'] = self.session.cookies.get("csrf_token")
24
+ return self.session.cookies
25
+
26
+ def set_tokens(self, *, csrf, access):
27
+ """workaround for federated login"""
28
+ self.session.cookies.set(name="__Host-csrf_token", value=csrf)
29
+ self.session.cookies.set(name="__Host-access_token", value=access)
30
+
31
+
32
+ self.options['headers']['x-csrf-token'] = csrf
33
+
34
+ @property
35
+ def me(self) -> dict:
36
+ url = f"{self.base_url}/account/profile"
37
+ return self.request(url, "GET")
38
+
39
+ @property
40
+ def workspace(self) -> dict:
41
+ url = f"{self.base_url}/features"
42
+ return self.request(url, "GET")
43
+
44
+
45
+ class ConsoleDerived(API):
46
+ def __init__(self, context: ConsoleUser):
47
+ super().__init__()
48
+ self.base_url = context.base_url
49
+ self.session.cookies = context.session.cookies
50
+ self.options = context.options