amsdal_ml 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- amsdal_ml/Third-Party Materials - AMSDAL Dependencies - License Notices.md +617 -0
- amsdal_ml/__about__.py +1 -1
- amsdal_ml/agents/__init__.py +13 -0
- amsdal_ml/agents/agent.py +5 -7
- amsdal_ml/agents/default_qa_agent.py +108 -143
- amsdal_ml/agents/functional_calling_agent.py +233 -0
- amsdal_ml/agents/mcp_client_tool.py +46 -0
- amsdal_ml/agents/python_tool.py +86 -0
- amsdal_ml/agents/retriever_tool.py +5 -6
- amsdal_ml/agents/tool_adapters.py +98 -0
- amsdal_ml/fileio/base_loader.py +7 -5
- amsdal_ml/fileio/openai_loader.py +16 -17
- amsdal_ml/mcp_client/base.py +2 -0
- amsdal_ml/mcp_client/http_client.py +7 -1
- amsdal_ml/mcp_client/stdio_client.py +19 -16
- amsdal_ml/mcp_server/server_retriever_stdio.py +8 -11
- amsdal_ml/ml_ingesting/__init__.py +29 -0
- amsdal_ml/ml_ingesting/default_ingesting.py +49 -51
- amsdal_ml/ml_ingesting/embedders/__init__.py +4 -0
- amsdal_ml/ml_ingesting/embedders/embedder.py +12 -0
- amsdal_ml/ml_ingesting/embedders/openai_embedder.py +30 -0
- amsdal_ml/ml_ingesting/embedding_data.py +3 -0
- amsdal_ml/ml_ingesting/loaders/__init__.py +6 -0
- amsdal_ml/ml_ingesting/loaders/folder_loader.py +52 -0
- amsdal_ml/ml_ingesting/loaders/loader.py +28 -0
- amsdal_ml/ml_ingesting/loaders/pdf_loader.py +136 -0
- amsdal_ml/ml_ingesting/loaders/text_loader.py +44 -0
- amsdal_ml/ml_ingesting/model_ingester.py +278 -0
- amsdal_ml/ml_ingesting/pipeline.py +131 -0
- amsdal_ml/ml_ingesting/pipeline_interface.py +31 -0
- amsdal_ml/ml_ingesting/processors/__init__.py +4 -0
- amsdal_ml/ml_ingesting/processors/cleaner.py +14 -0
- amsdal_ml/ml_ingesting/processors/text_cleaner.py +42 -0
- amsdal_ml/ml_ingesting/splitters/__init__.py +4 -0
- amsdal_ml/ml_ingesting/splitters/splitter.py +15 -0
- amsdal_ml/ml_ingesting/splitters/token_splitter.py +85 -0
- amsdal_ml/ml_ingesting/stores/__init__.py +4 -0
- amsdal_ml/ml_ingesting/stores/embedding_data.py +63 -0
- amsdal_ml/ml_ingesting/stores/store.py +22 -0
- amsdal_ml/ml_ingesting/types.py +40 -0
- amsdal_ml/ml_models/models.py +96 -4
- amsdal_ml/ml_models/openai_model.py +430 -122
- amsdal_ml/ml_models/utils.py +7 -0
- amsdal_ml/ml_retrievers/__init__.py +17 -0
- amsdal_ml/ml_retrievers/adapters.py +93 -0
- amsdal_ml/ml_retrievers/default_retriever.py +11 -1
- amsdal_ml/ml_retrievers/openai_retriever.py +27 -7
- amsdal_ml/ml_retrievers/query_retriever.py +487 -0
- amsdal_ml/ml_retrievers/retriever.py +12 -0
- amsdal_ml/models/embedding_model.py +7 -7
- amsdal_ml/prompts/__init__.py +77 -0
- amsdal_ml/prompts/database_query_agent.prompt +14 -0
- amsdal_ml/prompts/functional_calling_agent_base.prompt +9 -0
- amsdal_ml/prompts/nl_query_filter.prompt +318 -0
- amsdal_ml/{agents/promts → prompts}/react_chat.prompt +17 -8
- amsdal_ml/utils/__init__.py +5 -0
- amsdal_ml/utils/query_utils.py +189 -0
- {amsdal_ml-0.1.4.dist-info → amsdal_ml-0.2.0.dist-info}/METADATA +59 -1
- amsdal_ml-0.2.0.dist-info/RECORD +72 -0
- {amsdal_ml-0.1.4.dist-info → amsdal_ml-0.2.0.dist-info}/WHEEL +1 -1
- amsdal_ml/agents/promts/__init__.py +0 -58
- amsdal_ml-0.1.4.dist-info/RECORD +0 -39
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
You are an expert at converting natural language queries into structured JSON database filters for AMSDAL ORM.
|
|
2
|
+
|
|
3
|
+
Your task: Analyze the user's natural language query and produce a JSON object that strictly adheres to the provided JSON Schema. The root of the JSON object must be a key named "filters" containing an array of filter conditions.
|
|
4
|
+
|
|
5
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
6
|
+
DATABASE SCHEMA
|
|
7
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
8
|
+
|
|
9
|
+
{schema}
|
|
10
|
+
|
|
11
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
12
|
+
FILTER SYNTAX (AMSDAL lookups)
|
|
13
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
14
|
+
|
|
15
|
+
Each filter condition is an object with three keys:
|
|
16
|
+
{{
|
|
17
|
+
"field": "<field_name>",
|
|
18
|
+
"lookup": "<lookup_operator>",
|
|
19
|
+
"value": <value>
|
|
20
|
+
}}
|
|
21
|
+
|
|
22
|
+
This translates to AMSDAL QuerySet filter:
|
|
23
|
+
queryset.filter(field__lookup=value)
|
|
24
|
+
|
|
25
|
+
Examples:
|
|
26
|
+
- {{"field": "price", "lookup": "eq", "value": 100.50}}
|
|
27
|
+
→ queryset.filter(price__eq=100.50)
|
|
28
|
+
→ SQL: WHERE price = 100.50
|
|
29
|
+
|
|
30
|
+
- {{"field": "category", "lookup": "icontains", "value": "electronics"}}
|
|
31
|
+
→ queryset.filter(category__icontains="electronics")
|
|
32
|
+
→ SQL: WHERE LOWER(category) LIKE LOWER('%electronics%')
|
|
33
|
+
|
|
34
|
+
- {{"field": "created_at", "lookup": "gte", "value": "2025-09-01"}}
|
|
35
|
+
→ queryset.filter(created_at__gte="2025-09-01")
|
|
36
|
+
→ SQL: WHERE created_at >= '2025-09-01'
|
|
37
|
+
|
|
38
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
39
|
+
LOOKUP OPERATORS REFERENCE (AMSDAL ORM)
|
|
40
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
41
|
+
|
|
42
|
+
SUPPORTED LOOKUP OPERATORS (use these exact values):
|
|
43
|
+
----------------------------------------------------
|
|
44
|
+
|
|
45
|
+
COMPARISON OPERATORS:
|
|
46
|
+
• eq - Equal to (default if no lookup specified)
|
|
47
|
+
Example: {{"field": "price", "lookup": "eq", "value": 99.99}}
|
|
48
|
+
SQL: WHERE price = 99.99
|
|
49
|
+
|
|
50
|
+
• neq - Not equal to
|
|
51
|
+
Example: {{"field": "status", "lookup": "neq", "value": "deleted"}}
|
|
52
|
+
SQL: WHERE status != 'deleted'
|
|
53
|
+
|
|
54
|
+
• gt - Greater than (>)
|
|
55
|
+
Example: {{"field": "age", "lookup": "gt", "value": 18}}
|
|
56
|
+
SQL: WHERE age > 18
|
|
57
|
+
|
|
58
|
+
• gte - Greater than or equal (>=)
|
|
59
|
+
Example: {{"field": "created_at", "lookup": "gte", "value": "2025-01-01"}}
|
|
60
|
+
SQL: WHERE created_at >= '2025-01-01'
|
|
61
|
+
|
|
62
|
+
• lt - Less than (<)
|
|
63
|
+
Example: {{"field": "quantity", "lookup": "lt", "value": 100}}
|
|
64
|
+
SQL: WHERE quantity < 100
|
|
65
|
+
|
|
66
|
+
• lte - Less than or equal (<=)
|
|
67
|
+
Example: {{"field": "price", "lookup": "lte", "value": 50.00}}
|
|
68
|
+
SQL: WHERE price <= 50.00
|
|
69
|
+
|
|
70
|
+
STRING OPERATORS:
|
|
71
|
+
• contains - Case-sensitive substring match
|
|
72
|
+
Example: {{"field": "name", "lookup": "contains", "value": "Test"}}
|
|
73
|
+
SQL: WHERE name LIKE '%Test%'
|
|
74
|
+
|
|
75
|
+
• icontains - Case-insensitive substring match
|
|
76
|
+
Example: {{"field": "description", "lookup": "icontains", "value": "special"}}
|
|
77
|
+
SQL: WHERE LOWER(description) LIKE LOWER('%special%')
|
|
78
|
+
|
|
79
|
+
• startswith - String starts with (case-sensitive)
|
|
80
|
+
Example: {{"field": "code", "lookup": "startswith", "value": "PRD"}}
|
|
81
|
+
SQL: WHERE code LIKE 'PRD%'
|
|
82
|
+
|
|
83
|
+
• istartswith - String starts with (case-insensitive)
|
|
84
|
+
Example: {{"field": "email", "lookup": "istartswith", "value": "admin"}}
|
|
85
|
+
|
|
86
|
+
• endswith - String ends with (case-sensitive)
|
|
87
|
+
Example: {{"field": "filename", "lookup": "endswith", "value": ".pdf"}}
|
|
88
|
+
|
|
89
|
+
• iendswith - String ends with (case-insensitive)
|
|
90
|
+
Example: {{"field": "domain", "lookup": "iendswith", "value": ".com"}}
|
|
91
|
+
|
|
92
|
+
NULL CHECKS:
|
|
93
|
+
• isnull - Check if field is NULL
|
|
94
|
+
Example: {{"field": "deleted_at", "lookup": "isnull", "value": true}}
|
|
95
|
+
SQL: WHERE deleted_at IS NULL
|
|
96
|
+
|
|
97
|
+
REGEX OPERATORS:
|
|
98
|
+
• regex - Regular expression match (case-sensitive)
|
|
99
|
+
Example: {{"field": "phone", "lookup": "regex", "value": "^\\+1[0-9]{{10}}$"}}
|
|
100
|
+
|
|
101
|
+
• iregex - Regular expression match (case-insensitive)
|
|
102
|
+
Example: {{"field": "email", "lookup": "iregex", "value": ".*@(gmail|yahoo)\\.com$"}}
|
|
103
|
+
|
|
104
|
+
IMPORTANT NOTES:
|
|
105
|
+
----------------
|
|
106
|
+
1. Use "eq" for exact matches (NOT "exact")
|
|
107
|
+
2. Use "icontains" for case-insensitive text search (NOT "iexact")
|
|
108
|
+
3. For ranges, use TWO conditions with "gte" and "lte"
|
|
109
|
+
4. All lookup names are lowercase
|
|
110
|
+
5. Invalid lookups will be ignored or cause errors
|
|
111
|
+
6. For text searches, prefer flexible lookups (icontains) over exact matches when the query suggests partial matching
|
|
112
|
+
|
|
113
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
114
|
+
LOOKUP RULES BY FIELD TYPE (CRITICAL)
|
|
115
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
116
|
+
|
|
117
|
+
Follow these rules strictly based on the field type from the schema:
|
|
118
|
+
|
|
119
|
+
FOR "options: ..." FIELDS (single enum/literal value):
|
|
120
|
+
-------------------------------------------------------
|
|
121
|
+
✓ USE: eq, in
|
|
122
|
+
✗ DO NOT USE: icontains, contains (enums are exact values)
|
|
123
|
+
|
|
124
|
+
Example field type: "options: str('active'), str('pending'), str('inactive')"
|
|
125
|
+
User query: "find active records"
|
|
126
|
+
Correct: {{"field": "status", "lookup": "eq", "value": "active"}}
|
|
127
|
+
Wrong: {{"field": "status", "lookup": "icontains", "value": "active"}} ✗
|
|
128
|
+
|
|
129
|
+
FOR "string" FIELDS (single text value):
|
|
130
|
+
-----------------------------------------
|
|
131
|
+
✓ USE: eq, icontains, contains, startswith, istartswith, endswith, iendswith
|
|
132
|
+
✓ icontains is best for flexible text search
|
|
133
|
+
✓ When user searches for partial matches or specific terms within text, prefer icontains over eq
|
|
134
|
+
|
|
135
|
+
Example: {{"field": "name", "lookup": "icontains", "value": "john"}}
|
|
136
|
+
Example: {{"field": "term", "lookup": "icontains", "value": "10 years"}}
|
|
137
|
+
|
|
138
|
+
FOR "number" FIELDS:
|
|
139
|
+
--------------------
|
|
140
|
+
✓ USE: eq, neq, gt, gte, lt, lte, in
|
|
141
|
+
✗ DO NOT USE: contains, icontains, startswith (these are for strings)
|
|
142
|
+
|
|
143
|
+
Example: {{"field": "age", "lookup": "gte", "value": 18}}
|
|
144
|
+
|
|
145
|
+
FOR "boolean" FIELDS:
|
|
146
|
+
---------------------
|
|
147
|
+
✓ USE: eq ONLY
|
|
148
|
+
✗ DO NOT USE: any other lookups
|
|
149
|
+
|
|
150
|
+
Example: {{"field": "is_active", "lookup": "eq", "value": true}}
|
|
151
|
+
|
|
152
|
+
FOR "datetime" or "date" FIELDS:
|
|
153
|
+
--------------------------------
|
|
154
|
+
✓ USE: eq, gt, gte, lt, lte
|
|
155
|
+
✗ DO NOT USE: contains, icontains (dates are for comparison, not text search)
|
|
156
|
+
|
|
157
|
+
Example: {{"field": "created_at", "lookup": "gte", "value": "2025-01-01"}}
|
|
158
|
+
|
|
159
|
+
FOR LIST/ARRAY FIELDS:
|
|
160
|
+
----------------------
|
|
161
|
+
NOTE: List/array fields are currently SKIPPED from the schema due to AMSDAL limitations with JSONB array operations.
|
|
162
|
+
No filters can be applied to list fields at this time.
|
|
163
|
+
|
|
164
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
165
|
+
FIELD TYPE CONVERSIONS
|
|
166
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
167
|
+
|
|
168
|
+
PERCENTAGE VALUES:
|
|
169
|
+
-----------------
|
|
170
|
+
If a field stores decimal values (0.0-1.0) but user provides percentages:
|
|
171
|
+
ALWAYS convert percentage values to decimal fractions.
|
|
172
|
+
|
|
173
|
+
Examples:
|
|
174
|
+
- User says: "5.15%" → You return: 0.0515
|
|
175
|
+
- User says: "above 10%" → {{"field": "field_name", "lookup": "gt", "value": 0.10}}
|
|
176
|
+
- User says: "between 5% and 7.5%" → Use two conditions with 0.05 and 0.075
|
|
177
|
+
|
|
178
|
+
Common conversions:
|
|
179
|
+
- 5% → 0.05
|
|
180
|
+
- 10% → 0.10
|
|
181
|
+
- 50% → 0.50
|
|
182
|
+
- 100% → 1.00
|
|
183
|
+
|
|
184
|
+
DATE FIELDS:
|
|
185
|
+
-----------
|
|
186
|
+
Always use ISO format: "YYYY-MM-DD"
|
|
187
|
+
|
|
188
|
+
Examples:
|
|
189
|
+
- User says: "after September 2025" → "2025-09-01"
|
|
190
|
+
- User says: "before January 1st 2025" → "2025-01-01"
|
|
191
|
+
- User says: "in 2024" → Use range: [{{"lookup": "gte", "value": "2024-01-01"}}, {{"lookup": "lte", "value": "2024-12-31"}}]
|
|
192
|
+
|
|
193
|
+
BOOLEAN FIELDS:
|
|
194
|
+
--------------
|
|
195
|
+
Use true/false (lowercase, no quotes)
|
|
196
|
+
- User says: "is active" → {{"field": "is_active", "lookup": "eq", "value": true}}
|
|
197
|
+
- User says: "not published" → {{"field": "published", "lookup": "eq", "value": false}}
|
|
198
|
+
|
|
199
|
+
NUMERIC FIELDS:
|
|
200
|
+
--------------
|
|
201
|
+
Remove common formatting:
|
|
202
|
+
- "1,000" → 1000
|
|
203
|
+
- "$5,000" → 5000
|
|
204
|
+
- "100k" → 100000
|
|
205
|
+
- "1.5M" → 1500000
|
|
206
|
+
|
|
207
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
208
|
+
MULTIPLE FILTER CONDITIONS (AND logic)
|
|
209
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
210
|
+
|
|
211
|
+
Multiple conditions in the array are combined with AND logic.
|
|
212
|
+
|
|
213
|
+
User query: "find values between 5% and 5.5% in category A"
|
|
214
|
+
Your output:
|
|
215
|
+
[
|
|
216
|
+
{{"field": "percentage", "lookup": "gte", "value": 0.05}},
|
|
217
|
+
{{"field": "percentage", "lookup": "lte", "value": 0.055}},
|
|
218
|
+
{{"field": "category", "lookup": "icontains", "value": "a"}}
|
|
219
|
+
]
|
|
220
|
+
|
|
221
|
+
This translates to:
|
|
222
|
+
queryset.filter(percentage__gte=0.05, percentage__lte=0.055, category__icontains="a")
|
|
223
|
+
|
|
224
|
+
SQL:
|
|
225
|
+
WHERE percentage >= 0.05 AND percentage <= 0.055 AND LOWER(category) LIKE LOWER('%a%')
|
|
226
|
+
|
|
227
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
228
|
+
EXAMPLES BY USE CASE
|
|
229
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
230
|
+
|
|
231
|
+
1. EXACT MATCH:
|
|
232
|
+
User: "value is exactly 5.15%"
|
|
233
|
+
Output: [{{"field": "percentage", "lookup": "eq", "value": 0.0515}}]
|
|
234
|
+
|
|
235
|
+
2. CASE-INSENSITIVE SEARCH:
|
|
236
|
+
User: "show me items with status active"
|
|
237
|
+
Output: [{{"field": "status", "lookup": "icontains", "value": "active"}}]
|
|
238
|
+
|
|
239
|
+
3. DATE RANGE:
|
|
240
|
+
User: "created after September 2025"
|
|
241
|
+
Output: [{{"field": "created_at", "lookup": "gte", "value": "2025-09-01"}}]
|
|
242
|
+
|
|
243
|
+
4. COMBINED CONDITIONS:
|
|
244
|
+
User: "active users over 18 years old"
|
|
245
|
+
Output: [
|
|
246
|
+
{{"field": "is_active", "lookup": "eq", "value": true}},
|
|
247
|
+
{{"field": "age", "lookup": "gt", "value": 18}}
|
|
248
|
+
]
|
|
249
|
+
|
|
250
|
+
5. SUBSTRING SEARCH:
|
|
251
|
+
User: "find anything containing 'test'"
|
|
252
|
+
Output: [{{"field": "name", "lookup": "icontains", "value": "test"}}]
|
|
253
|
+
|
|
254
|
+
6. PARTIAL TEXT MATCH:
|
|
255
|
+
User: "rates with term 10 years"
|
|
256
|
+
Output: [{{"field": "term", "lookup": "icontains", "value": "10 years"}}]
|
|
257
|
+
(Use icontains for partial text matches, not eq)
|
|
258
|
+
|
|
259
|
+
7. NULL CHECK:
|
|
260
|
+
User: "show records with no end date"
|
|
261
|
+
Output: [{{"field": "end_date", "lookup": "isnull", "value": true}}]
|
|
262
|
+
|
|
263
|
+
8. NUMERIC RANGE:
|
|
264
|
+
User: "prices between 50 and 200"
|
|
265
|
+
Output: [
|
|
266
|
+
{{"field": "price", "lookup": "gte", "value": 50}},
|
|
267
|
+
{{"field": "price", "lookup": "lte", "value": 200}}
|
|
268
|
+
]
|
|
269
|
+
|
|
270
|
+
9. EMPTY QUERY:
|
|
271
|
+
User: "show me everything"
|
|
272
|
+
Output: []
|
|
273
|
+
(Empty array means no filters = return all records)
|
|
274
|
+
|
|
275
|
+
10. UNCLEAR OR PROBLEMATIC QUERY:
|
|
276
|
+
User: "find records that are somehow related to quantum physics"
|
|
277
|
+
Output: []
|
|
278
|
+
(Return empty array when query is unclear, too vague, or might cause execution issues)
|
|
279
|
+
|
|
280
|
+
11. LIST MATCH:
|
|
281
|
+
User: "find status pending, approved, or completed"
|
|
282
|
+
Output: [{{"field": "status", "lookup": "in", "value": ["pending", "approved", "completed"]}}]
|
|
283
|
+
|
|
284
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
285
|
+
OUTPUT FORMAT REQUIREMENTS
|
|
286
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
287
|
+
|
|
288
|
+
CRITICAL RULES:
|
|
289
|
+
1. Return ONLY a JSON array or object with "filters" array - no explanatory text
|
|
290
|
+
2. Each filter element must be a valid filter condition object
|
|
291
|
+
3. Use exact field names from the schema (case-sensitive)
|
|
292
|
+
4. Convert percentages to decimals for rate fields
|
|
293
|
+
5. Use ISO date format (YYYY-MM-DD) for date fields
|
|
294
|
+
6. If no filters are implied, return empty array: []
|
|
295
|
+
7. If you don't understand the query or foresee execution difficulties, ALWAYS return empty array: [] - do not attempt to create filters
|
|
296
|
+
|
|
297
|
+
VALID OUTPUT FORMATS:
|
|
298
|
+
✓ [{{"field": "price", "lookup": "eq", "value": 99.99}}]
|
|
299
|
+
✓ []
|
|
300
|
+
✓ [{{"field": "status", "lookup": "eq", "value": "active"}}, {{"field": "age", "lookup": "gt", "value": 18}}]
|
|
301
|
+
✓ {{"filters": [{{"field": "price", "lookup": "eq", "value": 99.99}}]}}
|
|
302
|
+
✓ {{"filters": []}}
|
|
303
|
+
✓ {{"filters": [{{"field": "price", "lookup": "gte", "value": 50}}, {{"field": "price", "lookup": "lte", "value": 200}}, {{"field": "category", "lookup": "icontains", "value": "electronics"}}, {{"field": "status", "lookup": "eq", "value": "active"}}]}}
|
|
304
|
+
|
|
305
|
+
INVALID OUTPUTS (will cause errors):
|
|
306
|
+
✗ [{{"field": "price", "value": 100}}] (missing "lookup")
|
|
307
|
+
✗ [{{"field": "Price", ...}}] (wrong case - field names are case-sensitive)
|
|
308
|
+
✗ [{{"field": "status", "lookup": "equals", ...}}] (invalid lookup - use "eq")
|
|
309
|
+
✗ [{{"field": "name", "lookup": "exact", ...}}] (invalid lookup - use "eq")
|
|
310
|
+
✗ [{{"field": "category", "lookup": "iexact", ...}}] (invalid - use "icontains" or "eq")
|
|
311
|
+
|
|
312
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
313
|
+
BEGIN PROCESSING
|
|
314
|
+
═══════════════════════════════════════════════════════════════════════════════
|
|
315
|
+
|
|
316
|
+
Analyze the user query below and return the JSON filter array.
|
|
317
|
+
---
|
|
318
|
+
{query}
|
|
@@ -4,11 +4,11 @@ TOOLS
|
|
|
4
4
|
------
|
|
5
5
|
{tools}
|
|
6
6
|
|
|
7
|
-
FORMAT (STRICT — EXACTLY ONE; NO extra lines, NO
|
|
7
|
+
FORMAT (STRICT — EXACTLY ONE; NO extra lines, NO wrapping codeblocks)
|
|
8
8
|
----------------------------------------------------------
|
|
9
9
|
1. If you do NOT need a tool:
|
|
10
10
|
Thought: Do I need to use a tool? No
|
|
11
|
-
Final Answer: <your final answer
|
|
11
|
+
Final Answer: <your final answer (can include Markdown tables)>
|
|
12
12
|
|
|
13
13
|
2. If you DO need a tool:
|
|
14
14
|
Thought: Do I need to use a tool? Yes
|
|
@@ -21,15 +21,18 @@ RULES
|
|
|
21
21
|
- `Action Input` MUST be a valid ONE-LINE JSON object (e.g. {{"a": 1, "b": 2}}).
|
|
22
22
|
- Do NOT add anything before/after the block.
|
|
23
23
|
- Do NOT print "Observation". The system will add it after tool execution.
|
|
24
|
-
- You CAN read and use any attached files provided by the platform
|
|
25
|
-
- Prefer not to use tools when the answer is fully supported by the user’s message and/or attached files.
|
|
24
|
+
- You CAN read and use any attached files provided by the platform.
|
|
25
|
+
- Prefer not to use tools when the answer is fully supported by the user’s message and/or attached files.
|
|
26
26
|
- If information is insufficient, answer concisely with: “I don’t have enough data to answer.”
|
|
27
|
-
- Be deterministic and concise. No preambles, no meta-commentary
|
|
28
|
-
- Numbers: preserve exact figures, units, and percentages
|
|
27
|
+
- Be deterministic and concise. No preambles, no meta-commentary.
|
|
28
|
+
- Numbers: preserve exact figures, units, and percentages.
|
|
29
29
|
- If you choose a tool:
|
|
30
30
|
- Use exactly one tool per step.
|
|
31
|
-
- The Action Input MUST be a single-line valid JSON object
|
|
32
|
-
|
|
31
|
+
- The Action Input MUST be a single-line valid JSON object.
|
|
32
|
+
|
|
33
|
+
CRITICAL FLOW RULES (MUST FOLLOW):
|
|
34
|
+
1. IMMEDIATE ANSWER: As soon as you receive an "Observation" containing data, you MUST provide a "Final Answer" in the very next step.
|
|
35
|
+
2. NO LOOPING: Do not say "Thought: I need to search again". Instead say "Thought: I have the data. Final Answer: ..."
|
|
33
36
|
|
|
34
37
|
PREVIOUS CONVERSATION
|
|
35
38
|
--------------------
|
|
@@ -43,4 +46,10 @@ SCRATCHPAD
|
|
|
43
46
|
----------
|
|
44
47
|
{agent_scratchpad}
|
|
45
48
|
|
|
49
|
+
!!! REMINDER !!!
|
|
50
|
+
If the Observation above contains the necessary information, STOP searching.
|
|
51
|
+
Your NEXT line MUST be:
|
|
52
|
+
Thought: Do I need to use a tool? No
|
|
53
|
+
Final Answer: <your answer>
|
|
54
|
+
|
|
46
55
|
Assistant:
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""Utilities for data cleaning, serialization, and table rendering."""
|
|
2
|
+
|
|
3
|
+
import inspect
|
|
4
|
+
from collections.abc import Callable
|
|
5
|
+
from datetime import date
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from amsdal_models.classes.model import Model
|
|
10
|
+
from typing_extensions import TypedDict
|
|
11
|
+
|
|
12
|
+
CleanedRecord = dict[str, Any]
|
|
13
|
+
|
|
14
|
+
MAX_CELL_LENGTH = 40
|
|
15
|
+
MAX_OBJECT_ID_LENGTH = 10
|
|
16
|
+
MAX_DICT_STR_LENGTH = 100
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Column(TypedDict):
|
|
20
|
+
"""TypedDict for defining table columns."""
|
|
21
|
+
field: str | Callable[[Any], str]
|
|
22
|
+
title: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
async def async_serialize_model(model: Model) -> dict[str, Any]:
|
|
26
|
+
"""
|
|
27
|
+
Asynchronously serializes a Model instance into a dictionary.
|
|
28
|
+
|
|
29
|
+
Handles async fields and nested models by extracting their display_name.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
model: The Model instance to serialize.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
A dictionary representation of the model.
|
|
36
|
+
"""
|
|
37
|
+
data = {}
|
|
38
|
+
for field_name in model.__fields__:
|
|
39
|
+
value = getattr(model, field_name)
|
|
40
|
+
if inspect.isawaitable(value):
|
|
41
|
+
value = await value
|
|
42
|
+
if isinstance(value, Model):
|
|
43
|
+
display_name = value.display_name
|
|
44
|
+
if inspect.isawaitable(display_name):
|
|
45
|
+
display_name = await display_name
|
|
46
|
+
data[field_name] = display_name
|
|
47
|
+
else:
|
|
48
|
+
data[field_name] = value
|
|
49
|
+
return data
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def clean_data(val: Any) -> Any:
|
|
53
|
+
"""
|
|
54
|
+
Cleans data for serialization/display.
|
|
55
|
+
|
|
56
|
+
Handles dates by formatting them, truncates strings and dicts to prevent overflow,
|
|
57
|
+
handles object IDs in dicts, and escapes markdown characters.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
val: The value to clean.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
The cleaned value.
|
|
64
|
+
"""
|
|
65
|
+
if val is None:
|
|
66
|
+
return ""
|
|
67
|
+
if isinstance(val, (datetime, date)):
|
|
68
|
+
if isinstance(val, datetime):
|
|
69
|
+
return val.strftime('%Y-%m-%dT%H:%M:%S')
|
|
70
|
+
return val.isoformat()
|
|
71
|
+
if isinstance(val, list):
|
|
72
|
+
return ", ".join(str(v) for v in val)
|
|
73
|
+
if isinstance(val, dict):
|
|
74
|
+
ref = val.get('ref')
|
|
75
|
+
if isinstance(ref, dict):
|
|
76
|
+
obj_id = ref.get('object_id', 'Object')
|
|
77
|
+
return (
|
|
78
|
+
obj_id[:MAX_OBJECT_ID_LENGTH] + '...'
|
|
79
|
+
if len(obj_id) > MAX_OBJECT_ID_LENGTH
|
|
80
|
+
else obj_id
|
|
81
|
+
)
|
|
82
|
+
s_val = str(val)
|
|
83
|
+
return (
|
|
84
|
+
s_val[:MAX_DICT_STR_LENGTH] + '...'
|
|
85
|
+
if len(s_val) > MAX_DICT_STR_LENGTH
|
|
86
|
+
else s_val
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
s = str(val)
|
|
90
|
+
s = s.replace('\n', ' ').replace('\r', '').replace('|', r'\|')
|
|
91
|
+
return s[: MAX_CELL_LENGTH - 3] + "..." if len(s) > MAX_CELL_LENGTH else s
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
async def serialize_and_clean_record(
|
|
95
|
+
record: Model | dict[str, Any],
|
|
96
|
+
) -> CleanedRecord:
|
|
97
|
+
"""
|
|
98
|
+
Serializes a Model or dict and cleans its values for display/usage.
|
|
99
|
+
|
|
100
|
+
Handles both Model instances (by serializing them asynchronously) and plain dicts,
|
|
101
|
+
then applies cleaning to all values.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
record: The Model instance or dictionary to serialize and clean.
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
A dictionary with cleaned field values.
|
|
108
|
+
"""
|
|
109
|
+
if isinstance(record, Model):
|
|
110
|
+
data = await async_serialize_model(record)
|
|
111
|
+
elif isinstance(record, dict):
|
|
112
|
+
data = record
|
|
113
|
+
else:
|
|
114
|
+
msg = f"Unsupported record type: {type(record)}"
|
|
115
|
+
raise ValueError(msg)
|
|
116
|
+
|
|
117
|
+
return {k: clean_data(v) for k, v in data.items()}
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def render_markdown_table(
|
|
121
|
+
records: list[CleanedRecord],
|
|
122
|
+
columns: list[Column] | None = None,
|
|
123
|
+
fields: list[str] | None = None,
|
|
124
|
+
) -> str:
|
|
125
|
+
"""
|
|
126
|
+
Renders a list of cleaned records as a Markdown table.
|
|
127
|
+
|
|
128
|
+
If columns are not provided, infers them from the first record's keys or uses the fields list.
|
|
129
|
+
Handles callable fields for custom value extraction.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
records: List of cleaned records to render.
|
|
133
|
+
columns: Optional list of column definitions with field and title.
|
|
134
|
+
fields: Optional list of field names to include if columns not provided.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
A string containing the Markdown table.
|
|
138
|
+
"""
|
|
139
|
+
if not records:
|
|
140
|
+
return "No records found."
|
|
141
|
+
|
|
142
|
+
if columns is None:
|
|
143
|
+
if fields:
|
|
144
|
+
columns = [
|
|
145
|
+
{"field": f, "title": f.replace('_', ' ').title()} for f in fields
|
|
146
|
+
]
|
|
147
|
+
else:
|
|
148
|
+
headers = list(records[0].keys())
|
|
149
|
+
columns = [
|
|
150
|
+
{"field": h, "title": h.replace('_', ' ').title()} for h in headers
|
|
151
|
+
]
|
|
152
|
+
|
|
153
|
+
if not columns:
|
|
154
|
+
return "No columns to display."
|
|
155
|
+
|
|
156
|
+
rows: list[dict[str, Any]] = []
|
|
157
|
+
for record in records:
|
|
158
|
+
row: dict[str, Any] = {}
|
|
159
|
+
for col in columns:
|
|
160
|
+
field = col["field"]
|
|
161
|
+
if callable(field):
|
|
162
|
+
value = field(record)
|
|
163
|
+
else:
|
|
164
|
+
value = record.get(field, '')
|
|
165
|
+
row[col["title"]] = value
|
|
166
|
+
rows.append(row)
|
|
167
|
+
|
|
168
|
+
headers = list(rows[0].keys())
|
|
169
|
+
|
|
170
|
+
col_widths: dict[str, int] = {h: len(h) for h in headers}
|
|
171
|
+
|
|
172
|
+
for row in rows:
|
|
173
|
+
for h in headers:
|
|
174
|
+
val = str(row.get(h, ''))
|
|
175
|
+
col_widths[h] = max(col_widths[h], len(val))
|
|
176
|
+
|
|
177
|
+
table_lines: list[str] = []
|
|
178
|
+
table_lines.append(
|
|
179
|
+
"| " + " | ".join(h.ljust(col_widths[h]) for h in headers) + " |"
|
|
180
|
+
)
|
|
181
|
+
table_lines.append(
|
|
182
|
+
"| " + " | ".join("-" * col_widths[h] for h in headers) + " |"
|
|
183
|
+
)
|
|
184
|
+
for row in rows:
|
|
185
|
+
table_lines.append(
|
|
186
|
+
"| " + " | ".join(str(row[h]).ljust(col_widths[h]) for h in headers) + " |"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
return '\n'.join(table_lines)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: amsdal_ml
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: amsdal_ml plugin for AMSDAL Framework
|
|
5
5
|
Requires-Python: >=3.11
|
|
6
6
|
Requires-Dist: aiohttp==3.12.15
|
|
@@ -13,6 +13,7 @@ Requires-Dist: mcp>=0.1
|
|
|
13
13
|
Requires-Dist: openai==1.100.2
|
|
14
14
|
Requires-Dist: pydantic-settings==2.10.1
|
|
15
15
|
Requires-Dist: pydantic==2.11.7
|
|
16
|
+
Requires-Dist: pymupdf>=1.24.10
|
|
16
17
|
Description-Content-Type: text/markdown
|
|
17
18
|
|
|
18
19
|
# AMSDAL ML
|
|
@@ -130,6 +131,53 @@ async for chunk in agent.astream('What is semantic search?'):
|
|
|
130
131
|
print(chunk, end='', flush=True)
|
|
131
132
|
```
|
|
132
133
|
|
|
134
|
+
### 5. Functional Calling Agent with Python Tools
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from amsdal_ml.agents.functional_calling_agent import FunctionalCallingAgent
|
|
138
|
+
from amsdal_ml.agents.python_tool import PythonTool
|
|
139
|
+
from amsdal_ml.ml_models.openai_model import OpenAIModel
|
|
140
|
+
|
|
141
|
+
llm = OpenAIModel()
|
|
142
|
+
agent = FunctionalCallingAgent(model=llm, tools=[search_tool, render_tool])
|
|
143
|
+
result = await agent.arun(user_query="Find products with price > 100", history=[])
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### 6. Natural Language Query Retriever
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from amsdal_ml.ml_retrievers.query_retriever import NLQueryRetriever
|
|
150
|
+
|
|
151
|
+
retriever = NLQueryRetriever(llm=llm, queryset=Product.objects.all())
|
|
152
|
+
documents = await retriever.invoke("Show me red products", limit=10)
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### 7. Document Ingestion Pipeline
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
from amsdal_ml.ml_ingesting import ModelIngester
|
|
159
|
+
from amsdal_ml.ml_ingesting.pipeline import DefaultIngestionPipeline
|
|
160
|
+
from amsdal_ml.ml_ingesting.loaders.pdf_loader import PdfLoader
|
|
161
|
+
from amsdal_ml.ml_ingesting.processors.text_cleaner import TextCleaner
|
|
162
|
+
from amsdal_ml.ml_ingesting.splitters.token_splitter import TokenSplitter
|
|
163
|
+
from amsdal_ml.ml_ingesting.embedders.openai_embedder import OpenAIEmbedder
|
|
164
|
+
from amsdal_ml.ml_ingesting.stores.embedding_data import EmbeddingDataStore
|
|
165
|
+
|
|
166
|
+
pipeline = DefaultIngestionPipeline(
|
|
167
|
+
loader=PdfLoader(), # Uses pymupdf for PDF processing
|
|
168
|
+
cleaner=TextCleaner(),
|
|
169
|
+
splitter=TokenSplitter(max_tokens=800, overlap_tokens=80),
|
|
170
|
+
embedder=OpenAIEmbedder(),
|
|
171
|
+
store=EmbeddingDataStore(),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
ingester = ModelIngester(
|
|
175
|
+
pipeline=pipeline,
|
|
176
|
+
base_tags=["document"],
|
|
177
|
+
base_metadata={"source": "pdf"},
|
|
178
|
+
)
|
|
179
|
+
```
|
|
180
|
+
|
|
133
181
|
## Architecture
|
|
134
182
|
|
|
135
183
|
### Core Components
|
|
@@ -139,6 +187,16 @@ async for chunk in agent.astream('What is semantic search?'):
|
|
|
139
187
|
- **`MLRetriever`**: Semantic similarity search with tag-based filtering
|
|
140
188
|
- **`Agent`**: Q&A and task-oriented agents with streaming and citations
|
|
141
189
|
- **`EmbeddingModel`**: Database model storing 1536-dimensional vectors linked to source objects
|
|
190
|
+
- **`PythonTool`**: Tool for executing Python functions within agents
|
|
191
|
+
- **`FunctionalCallingAgent`**: Agent specialized in functional calling with configurable tools
|
|
192
|
+
- **`NLQueryRetriever`**: Retriever for natural language queries on AMSDAL querysets
|
|
193
|
+
- **`DefaultIngestionPipeline`**: Pipeline for document ingestion including loader, cleaner, splitter, embedder, and store
|
|
194
|
+
- **`ModelIngester`**: High-level ingester for processing models with customizable pipelines and metadata
|
|
195
|
+
- **`PdfLoader`**: Document loader using pymupdf for PDF processing
|
|
196
|
+
- **`TextCleaner`**: Processor for cleaning and normalizing text
|
|
197
|
+
- **`TokenSplitter`**: Splitter for dividing text into chunks based on token count
|
|
198
|
+
- **`OpenAIEmbedder`**: Embedder for generating embeddings via OpenAI API
|
|
199
|
+
- **`EmbeddingDataStore`**: Store for saving embedding data linked to source objects
|
|
142
200
|
- **MCP Server/Client**: Expose retrievers as tools or consume external MCP services
|
|
143
201
|
|
|
144
202
|
### Configuration
|