amsdal_ml 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. amsdal_ml/Third-Party Materials - AMSDAL Dependencies - License Notices.md +617 -0
  2. amsdal_ml/__about__.py +1 -1
  3. amsdal_ml/agents/__init__.py +13 -0
  4. amsdal_ml/agents/agent.py +5 -7
  5. amsdal_ml/agents/default_qa_agent.py +108 -143
  6. amsdal_ml/agents/functional_calling_agent.py +233 -0
  7. amsdal_ml/agents/mcp_client_tool.py +46 -0
  8. amsdal_ml/agents/python_tool.py +86 -0
  9. amsdal_ml/agents/retriever_tool.py +17 -8
  10. amsdal_ml/agents/tool_adapters.py +98 -0
  11. amsdal_ml/fileio/base_loader.py +7 -5
  12. amsdal_ml/fileio/openai_loader.py +16 -17
  13. amsdal_ml/mcp_client/base.py +2 -0
  14. amsdal_ml/mcp_client/http_client.py +7 -1
  15. amsdal_ml/mcp_client/stdio_client.py +21 -18
  16. amsdal_ml/mcp_server/server_retriever_stdio.py +8 -11
  17. amsdal_ml/ml_ingesting/__init__.py +29 -0
  18. amsdal_ml/ml_ingesting/default_ingesting.py +49 -51
  19. amsdal_ml/ml_ingesting/embedders/__init__.py +4 -0
  20. amsdal_ml/ml_ingesting/embedders/embedder.py +12 -0
  21. amsdal_ml/ml_ingesting/embedders/openai_embedder.py +30 -0
  22. amsdal_ml/ml_ingesting/embedding_data.py +3 -0
  23. amsdal_ml/ml_ingesting/loaders/__init__.py +6 -0
  24. amsdal_ml/ml_ingesting/loaders/folder_loader.py +52 -0
  25. amsdal_ml/ml_ingesting/loaders/loader.py +28 -0
  26. amsdal_ml/ml_ingesting/loaders/pdf_loader.py +136 -0
  27. amsdal_ml/ml_ingesting/loaders/text_loader.py +44 -0
  28. amsdal_ml/ml_ingesting/model_ingester.py +278 -0
  29. amsdal_ml/ml_ingesting/pipeline.py +131 -0
  30. amsdal_ml/ml_ingesting/pipeline_interface.py +31 -0
  31. amsdal_ml/ml_ingesting/processors/__init__.py +4 -0
  32. amsdal_ml/ml_ingesting/processors/cleaner.py +14 -0
  33. amsdal_ml/ml_ingesting/processors/text_cleaner.py +42 -0
  34. amsdal_ml/ml_ingesting/splitters/__init__.py +4 -0
  35. amsdal_ml/ml_ingesting/splitters/splitter.py +15 -0
  36. amsdal_ml/ml_ingesting/splitters/token_splitter.py +85 -0
  37. amsdal_ml/ml_ingesting/stores/__init__.py +4 -0
  38. amsdal_ml/ml_ingesting/stores/embedding_data.py +63 -0
  39. amsdal_ml/ml_ingesting/stores/store.py +22 -0
  40. amsdal_ml/ml_ingesting/types.py +40 -0
  41. amsdal_ml/ml_models/models.py +96 -4
  42. amsdal_ml/ml_models/openai_model.py +430 -122
  43. amsdal_ml/ml_models/utils.py +7 -0
  44. amsdal_ml/ml_retrievers/__init__.py +17 -0
  45. amsdal_ml/ml_retrievers/adapters.py +93 -0
  46. amsdal_ml/ml_retrievers/default_retriever.py +11 -1
  47. amsdal_ml/ml_retrievers/openai_retriever.py +27 -7
  48. amsdal_ml/ml_retrievers/query_retriever.py +487 -0
  49. amsdal_ml/ml_retrievers/retriever.py +12 -0
  50. amsdal_ml/models/embedding_model.py +7 -7
  51. amsdal_ml/prompts/__init__.py +77 -0
  52. amsdal_ml/prompts/database_query_agent.prompt +14 -0
  53. amsdal_ml/prompts/functional_calling_agent_base.prompt +9 -0
  54. amsdal_ml/prompts/nl_query_filter.prompt +318 -0
  55. amsdal_ml/{agents/promts → prompts}/react_chat.prompt +17 -8
  56. amsdal_ml/utils/__init__.py +5 -0
  57. amsdal_ml/utils/query_utils.py +189 -0
  58. amsdal_ml-0.2.0.dist-info/METADATA +293 -0
  59. amsdal_ml-0.2.0.dist-info/RECORD +72 -0
  60. {amsdal_ml-0.1.3.dist-info → amsdal_ml-0.2.0.dist-info}/WHEEL +1 -1
  61. amsdal_ml/agents/promts/__init__.py +0 -58
  62. amsdal_ml-0.1.3.dist-info/METADATA +0 -69
  63. amsdal_ml-0.1.3.dist-info/RECORD +0 -39
@@ -0,0 +1,7 @@
1
+ from enum import Enum
2
+
3
+
4
+ class ResponseFormat(Enum):
5
+ PLAIN_TEXT = "plain_text"
6
+ JSON_OBJECT = "json_object"
7
+ JSON_SCHEMA = "json_schema"
@@ -0,0 +1,17 @@
1
+ from amsdal_ml.ml_retrievers.default_retriever import DefaultRetriever
2
+ from amsdal_ml.ml_retrievers.openai_retriever import OpenAIRetriever
3
+ from amsdal_ml.ml_retrievers.query_retriever import NLQueryExecutor
4
+ from amsdal_ml.ml_retrievers.query_retriever import NLQueryRetriever
5
+ from amsdal_ml.ml_retrievers.retriever import MLRetriever
6
+ from amsdal_ml.ml_retrievers.retriever import RetrievalChunk
7
+ from amsdal_ml.ml_retrievers.retriever import Retriever
8
+
9
+ __all__ = [
10
+ "DefaultRetriever",
11
+ "MLRetriever",
12
+ "NLQueryExecutor",
13
+ "NLQueryRetriever",
14
+ "OpenAIRetriever",
15
+ "RetrievalChunk",
16
+ "Retriever",
17
+ ]
@@ -0,0 +1,93 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from abc import ABC
5
+ from abc import abstractmethod
6
+ from typing import TYPE_CHECKING
7
+ from typing import Any
8
+
9
+ if TYPE_CHECKING:
10
+ from amsdal_ml.ml_models.models import MLModel
11
+ from amsdal_ml.ml_retrievers.query_retriever import FilterCondition
12
+
13
+
14
+ class RetrieverAdapter(ABC):
15
+
16
+ @abstractmethod
17
+ def get_response_schema(self, base_schema: dict[str, Any]) -> dict[str, Any]:
18
+ """Adapts the base JSON schema for the specific model."""
19
+ raise NotImplementedError
20
+
21
+ @abstractmethod
22
+ def parse_response(
23
+ self,
24
+ raw_json: str,
25
+ *,
26
+ is_schema_based: bool,
27
+ ) -> list[FilterCondition]:
28
+ """Parses the raw JSON string from the model into a list of FilterCondition."""
29
+ raise NotImplementedError
30
+
31
+
32
+ class DefaultRetrieverAdapter(RetrieverAdapter):
33
+ def get_response_schema(self, base_schema: dict[str, Any]) -> dict[str, Any]:
34
+ return base_schema
35
+
36
+ def parse_response(
37
+ self,
38
+ raw_json: str,
39
+ *,
40
+ is_schema_based: bool,
41
+ ) -> list[FilterCondition]:
42
+ from amsdal_ml.ml_retrievers.query_retriever import FilterCondition
43
+ from amsdal_ml.ml_retrievers.query_retriever import FilterResponse
44
+
45
+ try:
46
+ filter_data = json.loads(raw_json)
47
+
48
+ if is_schema_based:
49
+ return FilterResponse.model_validate(filter_data).filters
50
+
51
+ if isinstance(filter_data, dict) and "filters" in filter_data:
52
+ return [FilterCondition(**cond) for cond in filter_data["filters"]]
53
+ if isinstance(filter_data, list):
54
+ return [FilterCondition(**cond) for cond in filter_data]
55
+
56
+ return []
57
+ except (json.JSONDecodeError, Exception):
58
+ return []
59
+
60
+
61
+ class OpenAIRetrieverAdapter(DefaultRetrieverAdapter):
62
+ def get_response_schema(self, base_schema: dict[str, Any]) -> dict[str, Any]:
63
+ def add_additional_properties_recursively(schema_node: dict[str, Any] | list[Any]) -> None:
64
+ if isinstance(schema_node, dict):
65
+ if (
66
+ schema_node.get("type") == "object"
67
+ and "additionalProperties" not in schema_node
68
+ ):
69
+ schema_node["additionalProperties"] = False
70
+
71
+ for value in schema_node.values():
72
+ add_additional_properties_recursively(value)
73
+
74
+ elif isinstance(schema_node, list):
75
+ for item in schema_node:
76
+ add_additional_properties_recursively(item)
77
+
78
+ add_additional_properties_recursively(base_schema)
79
+
80
+ return {
81
+ "name": "data",
82
+ "strict": True,
83
+ "schema": base_schema
84
+ }
85
+
86
+
87
+ def get_retriever_adapter(model: MLModel) -> RetrieverAdapter:
88
+ model_name = model.__class__.__name__.lower()
89
+
90
+ if "openai" in model_name:
91
+ return OpenAIRetrieverAdapter()
92
+
93
+ return DefaultRetrieverAdapter()
@@ -6,6 +6,7 @@ from abc import abstractmethod
6
6
 
7
7
  from amsdal_models.classes.annotations import CosineDistance
8
8
 
9
+ from amsdal_ml.ml_ingesting.embedders.embedder import Embedder
9
10
  from amsdal_ml.models.embedding_model import EmbeddingModel
10
11
 
11
12
  from .retriever import MLRetriever
@@ -18,14 +19,21 @@ def _default_num_tokens(text: str) -> int:
18
19
 
19
20
  class DefaultRetriever(MLRetriever, ABC):
20
21
  def __init__(
21
- self, *, embedding_model_cls=EmbeddingModel, max_context_tokens: int = 1800, num_tokens_fn=_default_num_tokens
22
+ self,
23
+ *,
24
+ embedding_model_cls=EmbeddingModel,
25
+ max_context_tokens: int = 1800,
26
+ num_tokens_fn=_default_num_tokens,
27
+ embedder: Embedder | None = None,
22
28
  ):
23
29
  self.embedding_model_cls = embedding_model_cls
24
30
  self.max_context_tokens = max_context_tokens
25
31
  self.num_tokens_fn = num_tokens_fn
32
+ self.embedder = embedder
26
33
 
27
34
  @abstractmethod
28
35
  def _embed_query(self, text: str) -> list[float]: ...
36
+
29
37
  @abstractmethod
30
38
  async def _aembed_query(self, text: str) -> list[float]: ...
31
39
 
@@ -66,6 +74,7 @@ class DefaultRetriever(MLRetriever, ABC):
66
74
  raw_text=(r.raw_text or '').strip(),
67
75
  distance=float(getattr(r, 'distance', math.inf)),
68
76
  tags=list(r.tags or []),
77
+ metadata=dict(getattr(r, 'ml_metadata', {}) or {}),
69
78
  )
70
79
  for r in rows
71
80
  ]
@@ -100,6 +109,7 @@ class DefaultRetriever(MLRetriever, ABC):
100
109
  raw_text=(r.raw_text or '').strip(),
101
110
  distance=float(getattr(r, 'distance', math.inf)),
102
111
  tags=list(r.tags or []),
112
+ metadata=dict(getattr(r, 'ml_metadata', {}) or {}),
103
113
  )
104
114
  for r in rows
105
115
  ]
@@ -7,6 +7,7 @@ from openai import AsyncOpenAI
7
7
  from openai import OpenAI
8
8
 
9
9
  from amsdal_ml.ml_config import ml_config
10
+ from amsdal_ml.ml_ingesting.embedders.embedder import Embedder
10
11
 
11
12
  from .default_retriever import DefaultRetriever
12
13
 
@@ -14,26 +15,45 @@ DEFAULT_EMBED_MODEL = ml_config.embed_model_name
14
15
 
15
16
 
16
17
  class OpenAIRetriever(DefaultRetriever):
18
+ client: OpenAI | None = None
19
+ aclient: AsyncOpenAI | None = None
20
+
17
21
  def __init__(
18
22
  self,
19
23
  *,
20
24
  api_key: Optional[str] = None,
21
25
  embed_model: Optional[str] = None,
26
+ embedder: Embedder | None = None,
22
27
  ):
23
- super().__init__()
24
- self.api_key = api_key or ml_config.resolved_openai_key or os.getenv('OPENAI_API_KEY')
25
- if not self.api_key:
26
- msg = 'OPENAI_API_KEY is required for OpenAIRetriever'
27
- raise RuntimeError(msg)
28
+ super().__init__(embedder=embedder)
28
29
 
30
+ self.api_key = api_key or ml_config.resolved_openai_key or os.getenv('OPENAI_API_KEY')
29
31
  self.embed_model = embed_model or DEFAULT_EMBED_MODEL
30
- self.client = OpenAI(api_key=self.api_key)
31
- self.aclient = AsyncOpenAI(api_key=self.api_key)
32
+
33
+ if embedder is None:
34
+ if not self.api_key:
35
+ msg = 'OPENAI_API_KEY is required for OpenAIRetriever'
36
+ raise RuntimeError(msg)
37
+ self.client = OpenAI(api_key=self.api_key)
38
+ self.aclient = AsyncOpenAI(api_key=self.api_key)
39
+ else:
40
+ self.client = None
41
+ self.aclient = None
32
42
 
33
43
  def _embed_query(self, text: str) -> list[float]:
44
+ if self.embedder is not None:
45
+ return self.embedder.embed(text)
46
+ if not self.client:
47
+ msg = 'OpenAI client is not configured'
48
+ raise RuntimeError(msg)
34
49
  resp = self.client.embeddings.create(model=self.embed_model, input=text)
35
50
  return resp.data[0].embedding
36
51
 
37
52
  async def _aembed_query(self, text: str) -> list[float]:
53
+ if self.embedder is not None:
54
+ return await self.embedder.aembed(text)
55
+ if not self.aclient:
56
+ msg = 'Async OpenAI client is not configured'
57
+ raise RuntimeError(msg)
38
58
  resp = await self.aclient.embeddings.create(model=self.embed_model, input=text)
39
59
  return resp.data[0].embedding