amsdal_ml 0.1.4__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.github/workflows/ci.yml +18 -3
  2. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.gitignore +2 -0
  3. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/PKG-INFO +61 -3
  4. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/README.md +57 -0
  5. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/Third-Party Materials - AMSDAL Dependencies - License Notices.md +617 -0
  6. amsdal_ml-0.2.1/amsdal_ml/__about__.py +1 -0
  7. amsdal_ml-0.2.1/amsdal_ml/agents/__init__.py +13 -0
  8. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/agents/agent.py +5 -7
  9. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/agents/default_qa_agent.py +108 -143
  10. amsdal_ml-0.2.1/amsdal_ml/agents/functional_calling_agent.py +233 -0
  11. amsdal_ml-0.2.1/amsdal_ml/agents/mcp_client_tool.py +46 -0
  12. amsdal_ml-0.2.1/amsdal_ml/agents/python_tool.py +86 -0
  13. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/agents/retriever_tool.py +5 -6
  14. amsdal_ml-0.2.1/amsdal_ml/agents/tool_adapters.py +98 -0
  15. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/fileio/base_loader.py +7 -5
  16. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/fileio/openai_loader.py +16 -17
  17. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/mcp_client/base.py +2 -0
  18. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/mcp_client/http_client.py +7 -1
  19. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/mcp_client/stdio_client.py +19 -16
  20. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/mcp_server/server_retriever_stdio.py +8 -11
  21. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/__init__.py +29 -0
  22. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_ingesting/default_ingesting.py +49 -51
  23. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/embedders/__init__.py +4 -0
  24. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/embedders/embedder.py +12 -0
  25. amsdal_ml-0.1.4/amsdal_ml/ml_retrievers/openai_retriever.py → amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/embedders/openai_embedder.py +6 -15
  26. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_ingesting/embedding_data.py +3 -0
  27. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/loaders/__init__.py +6 -0
  28. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/loaders/folder_loader.py +52 -0
  29. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/loaders/loader.py +28 -0
  30. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/loaders/pdf_loader.py +136 -0
  31. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/loaders/text_loader.py +44 -0
  32. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/model_ingester.py +278 -0
  33. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/pipeline.py +131 -0
  34. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/pipeline_interface.py +31 -0
  35. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/processors/__init__.py +4 -0
  36. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/processors/cleaner.py +14 -0
  37. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/processors/text_cleaner.py +42 -0
  38. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/splitters/__init__.py +4 -0
  39. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/splitters/splitter.py +15 -0
  40. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/splitters/token_splitter.py +85 -0
  41. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/stores/__init__.py +4 -0
  42. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/stores/embedding_data.py +63 -0
  43. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/stores/store.py +22 -0
  44. amsdal_ml-0.2.1/amsdal_ml/ml_ingesting/types.py +40 -0
  45. amsdal_ml-0.2.1/amsdal_ml/ml_models/models.py +179 -0
  46. amsdal_ml-0.2.1/amsdal_ml/ml_models/openai_model.py +679 -0
  47. amsdal_ml-0.2.1/amsdal_ml/ml_models/utils.py +7 -0
  48. amsdal_ml-0.2.1/amsdal_ml/ml_retrievers/__init__.py +17 -0
  49. amsdal_ml-0.2.1/amsdal_ml/ml_retrievers/adapters.py +93 -0
  50. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_retrievers/default_retriever.py +11 -1
  51. amsdal_ml-0.2.1/amsdal_ml/ml_retrievers/openai_retriever.py +59 -0
  52. amsdal_ml-0.2.1/amsdal_ml/ml_retrievers/query_retriever.py +487 -0
  53. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_retrievers/retriever.py +12 -0
  54. amsdal_ml-0.2.1/amsdal_ml/models/embedding_model.py +21 -0
  55. amsdal_ml-0.2.1/amsdal_ml/prompts/__init__.py +77 -0
  56. amsdal_ml-0.2.1/amsdal_ml/prompts/database_query_agent.prompt +14 -0
  57. amsdal_ml-0.2.1/amsdal_ml/prompts/functional_calling_agent_base.prompt +9 -0
  58. amsdal_ml-0.2.1/amsdal_ml/prompts/nl_query_filter.prompt +318 -0
  59. {amsdal_ml-0.1.4/amsdal_ml/agents/promts → amsdal_ml-0.2.1/amsdal_ml/prompts}/react_chat.prompt +17 -8
  60. amsdal_ml-0.2.1/amsdal_ml/utils/__init__.py +5 -0
  61. amsdal_ml-0.2.1/amsdal_ml/utils/query_utils.py +189 -0
  62. amsdal_ml-0.2.1/change-logs.md +57 -0
  63. amsdal_ml-0.2.1/docker-compose.tests.yml +16 -0
  64. amsdal_ml-0.2.1/latest-changelogs.md +7 -0
  65. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/pyproject.toml +9 -5
  66. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/tests/agents_tests/test_arun.py +2 -2
  67. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/tests/agents_tests/test_astream.py +7 -7
  68. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/tests/agents_tests/test_astream_final_only.py +5 -5
  69. amsdal_ml-0.2.1/tests/agents_tests/test_fakes.py +272 -0
  70. amsdal_ml-0.2.1/tests/agents_tests/test_functional_calling_agent.py +241 -0
  71. amsdal_ml-0.2.1/tests/agents_tests/test_qa_agent_with_nlq_tool.py +268 -0
  72. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/tests/agents_tests/test_tool_call_arguments_async.py +16 -6
  73. amsdal_ml-0.2.1/tests/conftest.py +220 -0
  74. amsdal_ml-0.2.1/tests/fixtures/models/author.py +15 -0
  75. amsdal_ml-0.2.1/tests/fixtures/models/book.py +16 -0
  76. amsdal_ml-0.2.1/tests/fixtures/models/category.py +16 -0
  77. amsdal_ml-0.2.1/tests/fixtures/models/order.py +18 -0
  78. amsdal_ml-0.2.1/tests/fixtures/models/product.py +18 -0
  79. amsdal_ml-0.2.1/tests/fixtures/models/user.py +18 -0
  80. amsdal_ml-0.2.1/tests/fixtures/models/vehicle.py +32 -0
  81. amsdal_ml-0.2.1/tests/ingesting/test_folder_loader_and_rag.py +120 -0
  82. amsdal_ml-0.2.1/tests/ingesting/test_ingestion_components.py +232 -0
  83. amsdal_ml-0.2.1/tests/ingesting/test_model_ingester.py +140 -0
  84. amsdal_ml-0.2.1/tests/nlqueryretriever_tests/mock_tests/__init__.py +0 -0
  85. amsdal_ml-0.2.1/tests/nlqueryretriever_tests/mock_tests/test_retriever_mock.py +651 -0
  86. amsdal_ml-0.2.1/tests/nlqueryretriever_tests/models_tests/__init__.py +0 -0
  87. amsdal_ml-0.2.1/tests/nlqueryretriever_tests/models_tests/test_nl_query_models.py +305 -0
  88. amsdal_ml-0.2.1/tests/nlqueryretriever_tests/schema_tests/__init__.py +0 -0
  89. amsdal_ml-0.2.1/tests/nlqueryretriever_tests/schema_tests/test_comprehensive_schema.py +348 -0
  90. amsdal_ml-0.2.1/tests/nlqueryretriever_tests/schema_tests/test_nested_list_skipping.py +135 -0
  91. amsdal_ml-0.2.1/tests/test_files/.gitkeep +0 -0
  92. amsdal_ml-0.2.1/tests/test_files/pdf/Aspida.pdf +0 -0
  93. amsdal_ml-0.2.1/tests/test_files/pdf/Nassau.pdf +0 -0
  94. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/tests/test_openai_model.py +64 -2
  95. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/uv.lock +1030 -768
  96. amsdal_ml-0.1.4/amsdal_ml/__about__.py +0 -1
  97. amsdal_ml-0.1.4/amsdal_ml/agents/promts/__init__.py +0 -58
  98. amsdal_ml-0.1.4/amsdal_ml/ml_models/models.py +0 -87
  99. amsdal_ml-0.1.4/amsdal_ml/ml_models/openai_model.py +0 -371
  100. amsdal_ml-0.1.4/amsdal_ml/models/embedding_model.py +0 -21
  101. amsdal_ml-0.1.4/change-logs.md +0 -34
  102. amsdal_ml-0.1.4/latest-changelogs.md +0 -9
  103. amsdal_ml-0.1.4/tests/agents_tests/test_fakes.py +0 -173
  104. amsdal_ml-0.1.4/tests/conftest.py +0 -105
  105. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.amsdal/.dependencies +0 -0
  106. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.amsdal/.environment +0 -0
  107. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.amsdal/.secrets +0 -0
  108. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.amsdal-cli +0 -0
  109. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.github/workflows/release.yml +0 -0
  110. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/.github/workflows/tag_check.yml +0 -0
  111. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/CLAUDE.md +0 -0
  112. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/RELEASE.md +0 -0
  113. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/__init__.py +0 -0
  114. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/app.py +0 -0
  115. {amsdal_ml-0.1.4/amsdal_ml/agents → amsdal_ml-0.2.1/amsdal_ml/fileio}/__init__.py +0 -0
  116. {amsdal_ml-0.1.4/amsdal_ml/fileio → amsdal_ml-0.2.1/amsdal_ml/mcp_client}/__init__.py +0 -0
  117. {amsdal_ml-0.1.4/amsdal_ml/mcp_client → amsdal_ml-0.2.1/amsdal_ml/mcp_server}/__init__.py +0 -0
  118. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/migrations/0000_initial.py +0 -0
  119. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_config.py +0 -0
  120. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_ingesting/ingesting.py +0 -0
  121. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/ml_ingesting/openai_ingesting.py +0 -0
  122. {amsdal_ml-0.1.4/amsdal_ml/mcp_server → amsdal_ml-0.2.1/amsdal_ml/ml_models}/__init__.py +0 -0
  123. {amsdal_ml-0.1.4/amsdal_ml/ml_ingesting → amsdal_ml-0.2.1/amsdal_ml/models}/__init__.py +0 -0
  124. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/amsdal_ml/py.typed +0 -0
  125. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/config.yml +0 -0
  126. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/license_check.py +0 -0
  127. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/scripts/release.sh +0 -0
  128. {amsdal_ml-0.1.4 → amsdal_ml-0.2.1}/scripts/tag_check.sh +0 -0
  129. {amsdal_ml-0.1.4/amsdal_ml/ml_models → amsdal_ml-0.2.1/tests}/__init__.py +0 -0
  130. {amsdal_ml-0.1.4/amsdal_ml/ml_retrievers → amsdal_ml-0.2.1/tests/agents_tests}/__init__.py +0 -0
  131. {amsdal_ml-0.1.4/amsdal_ml → amsdal_ml-0.2.1/tests/fixtures}/models/__init__.py +0 -0
  132. {amsdal_ml-0.1.4/tests → amsdal_ml-0.2.1/tests/ingesting}/__init__.py +0 -0
  133. {amsdal_ml-0.1.4/tests/agents_tests → amsdal_ml-0.2.1/tests/nlqueryretriever_tests}/__init__.py +0 -0
@@ -23,7 +23,7 @@ jobs:
23
23
  python license_check.py
24
24
 
25
25
  test-lint:
26
- name: Run tests and check style
26
+ name: Run tests and check style (Python ${{ matrix.python-version }}, ${{ matrix.database-backend }})
27
27
  needs: [license-check]
28
28
  runs-on: self-hosted
29
29
  strategy:
@@ -31,6 +31,21 @@ jobs:
31
31
  fail-fast: false
32
32
  matrix:
33
33
  python-version: ["3.11", "3.12"]
34
+ database-backend: ["sqlite", "postgres"]
35
+ services:
36
+ postgres:
37
+ image: pgvector/pgvector:pg16
38
+ env:
39
+ POSTGRES_USER: postgres
40
+ POSTGRES_PASSWORD: example
41
+ POSTGRES_DB: postgres
42
+ ports:
43
+ - 5432:5432
44
+ options: >-
45
+ --health-cmd pg_isready
46
+ --health-interval 10s
47
+ --health-timeout 5s
48
+ --health-retries 5
34
49
  env:
35
50
  PYTHON: ${{ matrix.python-version }}
36
51
  DEPS: yes
@@ -54,9 +69,9 @@ jobs:
54
69
  hatch run sync
55
70
 
56
71
  - name: Run style checks
57
- if: always()
72
+ if: always() && matrix.database-backend == 'sqlite'
58
73
  run: hatch run all
59
74
 
60
75
  - name: Run tests
61
76
  if: always()
62
- run: hatch run cov tests/
77
+ run: hatch run cov tests/ -- --database_backend=${{ matrix.database-backend }}
@@ -1,6 +1,7 @@
1
1
  .venv/
2
2
  venv/
3
3
  /warehouse
4
+ .python-version
4
5
 
5
6
  __pycache__/
6
7
  *.py[cod]
@@ -34,3 +35,4 @@ Thumbs.db
34
35
  /models/
35
36
  /fixtures/
36
37
  /static/
38
+ .tmp
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: amsdal_ml
3
- Version: 0.1.4
3
+ Version: 0.2.1
4
4
  Summary: amsdal_ml plugin for AMSDAL Framework
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: aiohttp==3.12.15
@@ -11,8 +11,9 @@ Requires-Dist: amsdal-utils>=0.5.4
11
11
  Requires-Dist: amsdal>=0.5.6
12
12
  Requires-Dist: mcp>=0.1
13
13
  Requires-Dist: openai==1.100.2
14
- Requires-Dist: pydantic-settings==2.10.1
15
- Requires-Dist: pydantic==2.11.7
14
+ Requires-Dist: pydantic-settings~=2.12
15
+ Requires-Dist: pydantic~=2.12
16
+ Requires-Dist: pymupdf>=1.24.10
16
17
  Description-Content-Type: text/markdown
17
18
 
18
19
  # AMSDAL ML
@@ -130,6 +131,53 @@ async for chunk in agent.astream('What is semantic search?'):
130
131
  print(chunk, end='', flush=True)
131
132
  ```
132
133
 
134
+ ### 5. Functional Calling Agent with Python Tools
135
+
136
+ ```python
137
+ from amsdal_ml.agents.functional_calling_agent import FunctionalCallingAgent
138
+ from amsdal_ml.agents.python_tool import PythonTool
139
+ from amsdal_ml.ml_models.openai_model import OpenAIModel
140
+
141
+ llm = OpenAIModel()
142
+ agent = FunctionalCallingAgent(model=llm, tools=[search_tool, render_tool])
143
+ result = await agent.arun(user_query="Find products with price > 100", history=[])
144
+ ```
145
+
146
+ ### 6. Natural Language Query Retriever
147
+
148
+ ```python
149
+ from amsdal_ml.ml_retrievers.query_retriever import NLQueryRetriever
150
+
151
+ retriever = NLQueryRetriever(llm=llm, queryset=Product.objects.all())
152
+ documents = await retriever.invoke("Show me red products", limit=10)
153
+ ```
154
+
155
+ ### 7. Document Ingestion Pipeline
156
+
157
+ ```python
158
+ from amsdal_ml.ml_ingesting import ModelIngester
159
+ from amsdal_ml.ml_ingesting.pipeline import DefaultIngestionPipeline
160
+ from amsdal_ml.ml_ingesting.loaders.pdf_loader import PdfLoader
161
+ from amsdal_ml.ml_ingesting.processors.text_cleaner import TextCleaner
162
+ from amsdal_ml.ml_ingesting.splitters.token_splitter import TokenSplitter
163
+ from amsdal_ml.ml_ingesting.embedders.openai_embedder import OpenAIEmbedder
164
+ from amsdal_ml.ml_ingesting.stores.embedding_data import EmbeddingDataStore
165
+
166
+ pipeline = DefaultIngestionPipeline(
167
+ loader=PdfLoader(), # Uses pymupdf for PDF processing
168
+ cleaner=TextCleaner(),
169
+ splitter=TokenSplitter(max_tokens=800, overlap_tokens=80),
170
+ embedder=OpenAIEmbedder(),
171
+ store=EmbeddingDataStore(),
172
+ )
173
+
174
+ ingester = ModelIngester(
175
+ pipeline=pipeline,
176
+ base_tags=["document"],
177
+ base_metadata={"source": "pdf"},
178
+ )
179
+ ```
180
+
133
181
  ## Architecture
134
182
 
135
183
  ### Core Components
@@ -139,6 +187,16 @@ async for chunk in agent.astream('What is semantic search?'):
139
187
  - **`MLRetriever`**: Semantic similarity search with tag-based filtering
140
188
  - **`Agent`**: Q&A and task-oriented agents with streaming and citations
141
189
  - **`EmbeddingModel`**: Database model storing 1536-dimensional vectors linked to source objects
190
+ - **`PythonTool`**: Tool for executing Python functions within agents
191
+ - **`FunctionalCallingAgent`**: Agent specialized in functional calling with configurable tools
192
+ - **`NLQueryRetriever`**: Retriever for natural language queries on AMSDAL querysets
193
+ - **`DefaultIngestionPipeline`**: Pipeline for document ingestion including loader, cleaner, splitter, embedder, and store
194
+ - **`ModelIngester`**: High-level ingester for processing models with customizable pipelines and metadata
195
+ - **`PdfLoader`**: Document loader using pymupdf for PDF processing
196
+ - **`TextCleaner`**: Processor for cleaning and normalizing text
197
+ - **`TokenSplitter`**: Splitter for dividing text into chunks based on token count
198
+ - **`OpenAIEmbedder`**: Embedder for generating embeddings via OpenAI API
199
+ - **`EmbeddingDataStore`**: Store for saving embedding data linked to source objects
142
200
  - **MCP Server/Client**: Expose retrievers as tools or consume external MCP services
143
201
 
144
202
  ### Configuration
@@ -113,6 +113,53 @@ async for chunk in agent.astream('What is semantic search?'):
113
113
  print(chunk, end='', flush=True)
114
114
  ```
115
115
 
116
+ ### 5. Functional Calling Agent with Python Tools
117
+
118
+ ```python
119
+ from amsdal_ml.agents.functional_calling_agent import FunctionalCallingAgent
120
+ from amsdal_ml.agents.python_tool import PythonTool
121
+ from amsdal_ml.ml_models.openai_model import OpenAIModel
122
+
123
+ llm = OpenAIModel()
124
+ agent = FunctionalCallingAgent(model=llm, tools=[search_tool, render_tool])
125
+ result = await agent.arun(user_query="Find products with price > 100", history=[])
126
+ ```
127
+
128
+ ### 6. Natural Language Query Retriever
129
+
130
+ ```python
131
+ from amsdal_ml.ml_retrievers.query_retriever import NLQueryRetriever
132
+
133
+ retriever = NLQueryRetriever(llm=llm, queryset=Product.objects.all())
134
+ documents = await retriever.invoke("Show me red products", limit=10)
135
+ ```
136
+
137
+ ### 7. Document Ingestion Pipeline
138
+
139
+ ```python
140
+ from amsdal_ml.ml_ingesting import ModelIngester
141
+ from amsdal_ml.ml_ingesting.pipeline import DefaultIngestionPipeline
142
+ from amsdal_ml.ml_ingesting.loaders.pdf_loader import PdfLoader
143
+ from amsdal_ml.ml_ingesting.processors.text_cleaner import TextCleaner
144
+ from amsdal_ml.ml_ingesting.splitters.token_splitter import TokenSplitter
145
+ from amsdal_ml.ml_ingesting.embedders.openai_embedder import OpenAIEmbedder
146
+ from amsdal_ml.ml_ingesting.stores.embedding_data import EmbeddingDataStore
147
+
148
+ pipeline = DefaultIngestionPipeline(
149
+ loader=PdfLoader(), # Uses pymupdf for PDF processing
150
+ cleaner=TextCleaner(),
151
+ splitter=TokenSplitter(max_tokens=800, overlap_tokens=80),
152
+ embedder=OpenAIEmbedder(),
153
+ store=EmbeddingDataStore(),
154
+ )
155
+
156
+ ingester = ModelIngester(
157
+ pipeline=pipeline,
158
+ base_tags=["document"],
159
+ base_metadata={"source": "pdf"},
160
+ )
161
+ ```
162
+
116
163
  ## Architecture
117
164
 
118
165
  ### Core Components
@@ -122,6 +169,16 @@ async for chunk in agent.astream('What is semantic search?'):
122
169
  - **`MLRetriever`**: Semantic similarity search with tag-based filtering
123
170
  - **`Agent`**: Q&A and task-oriented agents with streaming and citations
124
171
  - **`EmbeddingModel`**: Database model storing 1536-dimensional vectors linked to source objects
172
+ - **`PythonTool`**: Tool for executing Python functions within agents
173
+ - **`FunctionalCallingAgent`**: Agent specialized in functional calling with configurable tools
174
+ - **`NLQueryRetriever`**: Retriever for natural language queries on AMSDAL querysets
175
+ - **`DefaultIngestionPipeline`**: Pipeline for document ingestion including loader, cleaner, splitter, embedder, and store
176
+ - **`ModelIngester`**: High-level ingester for processing models with customizable pipelines and metadata
177
+ - **`PdfLoader`**: Document loader using pymupdf for PDF processing
178
+ - **`TextCleaner`**: Processor for cleaning and normalizing text
179
+ - **`TokenSplitter`**: Splitter for dividing text into chunks based on token count
180
+ - **`OpenAIEmbedder`**: Embedder for generating embeddings via OpenAI API
181
+ - **`EmbeddingDataStore`**: Store for saving embedding data linked to source objects
125
182
  - **MCP Server/Client**: Expose retrievers as tools or consume external MCP services
126
183
 
127
184
  ### Configuration