orchestrator-core 4.5.0a2__py3-none-any.whl → 4.5.0a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. orchestrator/__init__.py +1 -1
  2. orchestrator/api/api_v1/endpoints/search.py +13 -0
  3. orchestrator/devtools/populator.py +16 -0
  4. orchestrator/log_config.py +1 -0
  5. orchestrator/migrations/helpers.py +1 -1
  6. orchestrator/schemas/search.py +13 -0
  7. orchestrator/schemas/workflow.py +1 -0
  8. orchestrator/search/agent/__init__.py +13 -0
  9. orchestrator/search/agent/agent.py +13 -0
  10. orchestrator/search/agent/prompts.py +13 -0
  11. orchestrator/search/agent/state.py +13 -0
  12. orchestrator/search/agent/tools.py +27 -5
  13. orchestrator/search/core/__init__.py +12 -0
  14. orchestrator/search/core/embedding.py +13 -4
  15. orchestrator/search/core/exceptions.py +14 -0
  16. orchestrator/search/core/types.py +15 -0
  17. orchestrator/search/core/validators.py +13 -0
  18. orchestrator/search/filters/__init__.py +13 -0
  19. orchestrator/search/filters/base.py +23 -18
  20. orchestrator/search/filters/date_filters.py +13 -0
  21. orchestrator/search/filters/definitions.py +16 -2
  22. orchestrator/search/filters/ltree_filters.py +16 -3
  23. orchestrator/search/filters/numeric_filter.py +13 -0
  24. orchestrator/search/indexing/__init__.py +13 -0
  25. orchestrator/search/indexing/indexer.py +13 -0
  26. orchestrator/search/indexing/registry.py +13 -0
  27. orchestrator/search/indexing/tasks.py +13 -0
  28. orchestrator/search/indexing/traverse.py +17 -5
  29. orchestrator/search/retrieval/__init__.py +13 -0
  30. orchestrator/search/retrieval/builder.py +17 -7
  31. orchestrator/search/retrieval/engine.py +35 -29
  32. orchestrator/search/retrieval/exceptions.py +90 -0
  33. orchestrator/search/retrieval/pagination.py +13 -0
  34. orchestrator/search/retrieval/retrievers/__init__.py +26 -0
  35. orchestrator/search/retrieval/retrievers/base.py +122 -0
  36. orchestrator/search/retrieval/retrievers/fuzzy.py +94 -0
  37. orchestrator/search/retrieval/retrievers/hybrid.py +188 -0
  38. orchestrator/search/retrieval/retrievers/semantic.py +94 -0
  39. orchestrator/search/retrieval/retrievers/structured.py +39 -0
  40. orchestrator/search/retrieval/utils.py +21 -7
  41. orchestrator/search/retrieval/validation.py +54 -76
  42. orchestrator/search/schemas/__init__.py +12 -0
  43. orchestrator/search/schemas/parameters.py +13 -0
  44. orchestrator/search/schemas/results.py +14 -1
  45. orchestrator/workflows/tasks/validate_products.py +1 -1
  46. {orchestrator_core-4.5.0a2.dist-info → orchestrator_core-4.5.0a3.dist-info}/METADATA +2 -2
  47. {orchestrator_core-4.5.0a2.dist-info → orchestrator_core-4.5.0a3.dist-info}/RECORD +49 -43
  48. orchestrator/search/retrieval/retriever.py +0 -447
  49. {orchestrator_core-4.5.0a2.dist-info → orchestrator_core-4.5.0a3.dist-info}/WHEEL +0 -0
  50. {orchestrator_core-4.5.0a2.dist-info → orchestrator_core-4.5.0a3.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,9 @@
1
- orchestrator/__init__.py,sha256=xKpOlVZwRPcmklRABJcL_1Kwp63ENR1Z5K7RGckwtVs,1732
1
+ orchestrator/__init__.py,sha256=2qjP04OyiwpdXVkj5X7tBnpTrC4iWMELv2zmJe2WZBs,1732
2
2
  orchestrator/agentic_app.py,sha256=bBMuH9Ub42nb8oFG0U00SzW_uQqnAayUX2tNs6yz1BM,2810
3
3
  orchestrator/app.py,sha256=UPKQuDpg8MWNC6r3SRRbp6l9RBzwb00IMIaGRk-jbCU,13203
4
4
  orchestrator/exception_handlers.py,sha256=UsW3dw8q0QQlNLcV359bIotah8DYjMsj2Ts1LfX4ClY,1268
5
5
  orchestrator/llm_settings.py,sha256=PJ3vf5aEugVigHFU7iw9haQon_bC7Y268GTFhfFaQHs,2075
6
- orchestrator/log_config.py,sha256=1tPRX5q65e57a6a_zEii_PFK8SzWT0mnA5w2sKg4hh8,1853
6
+ orchestrator/log_config.py,sha256=1cPl_OXT4tEUyNxG8cwIWXrmadUm1E81vq0mdtrV-v4,1912
7
7
  orchestrator/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  orchestrator/security.py,sha256=iXFxGxab54aav7oHEKLAVkTgrQMJGHy6IYLojEnD7gI,2422
9
9
  orchestrator/settings.py,sha256=30iYKd_wNtjIO12DZ4LrH9w9OJgtmQ2AFEOSnrVTsRg,4365
@@ -23,7 +23,7 @@ orchestrator/api/api_v1/endpoints/processes.py,sha256=238Bydgj4ILNyMU_7j_Q7a0WGl
23
23
  orchestrator/api/api_v1/endpoints/product_blocks.py,sha256=kZ6ywIOsS_S2qGq7RvZ4KzjvaS1LmwbGWR37AKRvWOw,2146
24
24
  orchestrator/api/api_v1/endpoints/products.py,sha256=BfFtwu9dZXEQbtKxYj9icc73GKGvAGMR5ytyf41nQlQ,3081
25
25
  orchestrator/api/api_v1/endpoints/resource_types.py,sha256=gGyuaDyOD0TAVoeFGaGmjDGnQ8eQQArOxKrrk4MaDzA,2145
26
- orchestrator/api/api_v1/endpoints/search.py,sha256=QFxnMFQ2HgpL9Ebdc-vta6Z7Rdq5Qb9OKxyiPy2Lu9o,10200
26
+ orchestrator/api/api_v1/endpoints/search.py,sha256=sxiS2n1I0ghfQifo8Vf4PsAoxxGjKKqPTtHcAiuWrPE,10780
27
27
  orchestrator/api/api_v1/endpoints/settings.py,sha256=5s-k169podZjgGHUbVDmSQwpY_3Cs_Bbf2PPtZIkBcw,6184
28
28
  orchestrator/api/api_v1/endpoints/subscription_customer_descriptions.py,sha256=1_6LtgQleoq3M6z_W-Qz__Bj3OFUweoPrUqHMwSH6AM,3288
29
29
  orchestrator/api/api_v1/endpoints/subscriptions.py,sha256=7KaodccUiMkcVnrFnK2azp_V_-hGudcIyhov5WwVGQY,9810
@@ -138,7 +138,7 @@ orchestrator/db/sorting/sorting.py,sha256=WpwImCDRKiOp4Tr54vovWpHkoJIov8SNQNPods
138
138
  orchestrator/db/sorting/subscription.py,sha256=uepBMyfRFLZz5yoYK4VK3mdRBvO1Gc-6jSQXQ41fR-8,1441
139
139
  orchestrator/db/sorting/workflow.py,sha256=6-JceMyB99M994Re58E0MX5uhlpnTW5OJCxmXopEfRU,576
140
140
  orchestrator/devtools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
141
- orchestrator/devtools/populator.py,sha256=-8i3KDDP1cRgwiDKuYmomwrSlbmcMhpAaEaDvhyIbk4,19688
141
+ orchestrator/devtools/populator.py,sha256=U7j5Gvu5mU8kvqx9jfno25aYyD5GFSk9ZQ2zYSagQOI,20399
142
142
  orchestrator/devtools/scripts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
143
143
  orchestrator/devtools/scripts/migrate_20.py,sha256=8-qLiWfjYctu2kNl5MHtZvfeCdUs0YzRjepy4VYOUkc,4891
144
144
  orchestrator/devtools/scripts/migrate_30.py,sha256=pRnJQFvmliwTLgbbDSUGyS9sCWqQcTms-g_3yfUO5vQ,3030
@@ -220,7 +220,7 @@ orchestrator/migrations/README,sha256=heMzebYwlGhnE8_4CWJ4LS74WoEZjBy-S-mIJRxAEK
220
220
  orchestrator/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
221
221
  orchestrator/migrations/alembic.ini,sha256=kMoADqhGeubU8xanILNaqm4oixLy9m4ngYtdGpZcc7I,873
222
222
  orchestrator/migrations/env.py,sha256=M_cPoAL2axuuup5fvMy8I_WTPHEw0RbPEHkhZ3QEGoE,3740
223
- orchestrator/migrations/helpers.py,sha256=CAGGKhxpmhyKGfYcO-SUCPfMTOCZPfEpkJrcm2MYfcE,47979
223
+ orchestrator/migrations/helpers.py,sha256=g3G8P5IQ1OI4kmlNP8L0v9nzWvekzXwilXLdEvbN9ZI,47992
224
224
  orchestrator/migrations/script.py.mako,sha256=607Zrgp-Z-m9WGLt4wewN1QDOmHeifxcePUdADkSZyM,510
225
225
  orchestrator/migrations/templates/alembic.ini.j2,sha256=8v7UbKvOiWEbEKQa-Au3uONKUuYx6aflulYanZX6r2I,883
226
226
  orchestrator/migrations/templates/env.py.j2,sha256=LIt0ildZTZvNEx3imhy4GNzfFi_rPZg-8H7rGgrBOP8,2717
@@ -271,44 +271,50 @@ orchestrator/schemas/process.py,sha256=UACBNt-4g4v9Y528u-gZ-Wk7YxwJHhnI4cEu5CtQm
271
271
  orchestrator/schemas/product.py,sha256=MhMCh058ZuS2RJq-wSmxIPUNlhQexxXIx3DSz2OmOh4,1570
272
272
  orchestrator/schemas/product_block.py,sha256=kCqvm6qadHpegMr9aWI_fYX-T7mS-5S-ldPxnGQZg7M,1519
273
273
  orchestrator/schemas/resource_type.py,sha256=VDju4XywcDDLxdpbWU62RTvR9QF8x_GRrpTlN_NE8uI,1064
274
- orchestrator/schemas/search.py,sha256=yOlkG61BxSTL5xvepxrG-Qz_NceSw5E0g-7GUkjaj9Q,2837
274
+ orchestrator/schemas/search.py,sha256=Q89GAPrmHf2DnwTJiPMYog1xAIC3QMJ3IItFZdVVFXg,3417
275
275
  orchestrator/schemas/subscription.py,sha256=-jXyHZIed9Xlia18ksSDyenblNN6Q2yM2FlGELyJ458,3423
276
276
  orchestrator/schemas/subscription_descriptions.py,sha256=Ft_jw1U0bf9Z0U8O4OWfLlcl0mXCVT_qYVagBP3GbIQ,1262
277
- orchestrator/schemas/workflow.py,sha256=VqQ9XfV4fVd6MjY0LRRQzWBJHmlPsAamWfTwDx1cZkg,2102
277
+ orchestrator/schemas/workflow.py,sha256=StVoRGyNT2iIeq3z8BIlTPt0bcafzbeYxXRrCucR6LU,2146
278
278
  orchestrator/search/__init__.py,sha256=2uhTQexKx-cdBP1retV3CYSNCs02s8WL3fhGvupRGZk,571
279
- orchestrator/search/agent/__init__.py,sha256=ucZF-4ZsDc911Zyjmc1OK2ZcA6C64GFdMOVP26sWW7Q,166
280
- orchestrator/search/agent/agent.py,sha256=6wgsoOkGay_Qnaz8GJNrhKkA50ijotKTnj9ivXvswZg,1740
281
- orchestrator/search/agent/prompts.py,sha256=XdS97esjAJacNHtaeYATO-4JXVXnHibAd09BCtlvzBI,3821
282
- orchestrator/search/agent/state.py,sha256=DPCbvp6_WCxXwuJq1IU9glfKNZ1mRODoFcLjb9AOke0,203
283
- orchestrator/search/agent/tools.py,sha256=n5fdVgbvadWjgNN2MZwl5WyeLCKNS6Az_x4b9L6qljs,8382
284
- orchestrator/search/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
285
- orchestrator/search/core/embedding.py,sha256=s7P_2hrofnRXnfUFKuDJcpYkm_KzFkmhFe5j2n8_W7U,2297
286
- orchestrator/search/core/exceptions.py,sha256=uIlePbEyShcoM5uSCGcOlI-WEfEae3uBECGT4p40eaA,448
287
- orchestrator/search/core/types.py,sha256=Zm3NgoKwmvu3wTRDUrQ8Wn5l05r2hOXh-OkB-uGzLNs,8228
288
- orchestrator/search/core/validators.py,sha256=Ny80tH3SHuM64yZCi-9kfX-66NKGjsp_0oG7uJ21JVk,624
279
+ orchestrator/search/agent/__init__.py,sha256=guIh2VaXHvR8wkdOJMhoFmSQCLju4qTXyZDuLc12zrA,746
280
+ orchestrator/search/agent/agent.py,sha256=iejwwBl4YPDVpYIBy9NzNdOSW7xbOUZedfC_7iGHpSg,2320
281
+ orchestrator/search/agent/prompts.py,sha256=2VS0Ho-jq9u2kqrlcCIAHRqs35eww1eC0tZXPlhTbaE,4401
282
+ orchestrator/search/agent/state.py,sha256=1WHYol5UlYpq2QZz-BVsBFYrJZms5P18ohN2Ur8P2F4,783
283
+ orchestrator/search/agent/tools.py,sha256=4kvY0tG7i5-w8C-ZMuSabxb_sJmd_TpFl3F4xeGgzok,9513
284
+ orchestrator/search/core/__init__.py,sha256=q5G0z3nKjIHKFs1PkEG3nvTUy3Wp4kCyBtCbqUITj3A,579
285
+ orchestrator/search/core/embedding.py,sha256=ESeI5Vcobb__CRRZE_RP-m4eAz8JUP8S16aGLJh4uAY,2751
286
+ orchestrator/search/core/exceptions.py,sha256=qp7ZdyDvN5b2HD5_oZXMgoLJgy79krpClszKh3KPuAw,1029
287
+ orchestrator/search/core/types.py,sha256=Gaf77cKUqnE8vJNCpk-g3h2U5912axhIgZZnF_0_O48,8831
288
+ orchestrator/search/core/validators.py,sha256=zktY5A3RTBmfdARJoxoz9rnnyTZj7L30Kbmh9UTQz2o,1204
289
289
  orchestrator/search/docs/index.md,sha256=zKzE2fbtHDfYTKaHg628wAsqCTOJ5yFUWV0ucFH3pAg,863
290
290
  orchestrator/search/docs/running_local_text_embedding_inference.md,sha256=KlFxyAjHfLyCeV9fXAFVUqZOFWYwGPH-_oBjWx2Vgng,1255
291
- orchestrator/search/filters/__init__.py,sha256=h9wjnKLcIfG1TwiuwtnlDvv9XMWLxkjCBD9D8qCOoQU,642
292
- orchestrator/search/filters/base.py,sha256=E6fXcYEdFOAel90yHo41lVGplbCqUsPvPiMJbCO5FCk,11152
293
- orchestrator/search/filters/date_filters.py,sha256=dDbTPI-badVnaKM404waQ3yzTOHJNn59kYoqHvW3XFE,2460
294
- orchestrator/search/filters/definitions.py,sha256=oIwW8dWz7HuRkEvCbCfj2WOOdE_PKh0b5n8Re5x_lS0,3455
295
- orchestrator/search/filters/ltree_filters.py,sha256=kyMmm1EYKYVUwPK5p9tyL-da0SrCe6LPmFW56_6y0uY,1696
296
- orchestrator/search/filters/numeric_filter.py,sha256=GPBcZgrip2ruxsBx2AHZqxS16zkQG3C6zLJAGC2s2VU,2194
297
- orchestrator/search/indexing/__init__.py,sha256=7IVylH0S5FPUh6jb9H9vNLb61gQQIk_sNrSHc8WoSD0,82
298
- orchestrator/search/indexing/indexer.py,sha256=9l4bXwNAfsjMrrzit601solAl6W07Pyj9-SRldmZjGU,14391
299
- orchestrator/search/indexing/registry.py,sha256=cSeZe6aq3XME-RRz4AMD8BHXzx7dvU6tBa05ecjTzfk,2468
300
- orchestrator/search/indexing/tasks.py,sha256=k7GihQLov8FMhzYM_6f-IlWPMuP1w2QZpfOyloppKFM,1783
301
- orchestrator/search/indexing/traverse.py,sha256=_lWbUHy1S5-oDyo-4dDeivb9KTn6VUq_T_KXxFm_A2Y,13775
302
- orchestrator/search/retrieval/__init__.py,sha256=EixZVzUzP3FOC-hWwf3pqvz0XHZe1HyHBsmJlTEU0Cw,65
303
- orchestrator/search/retrieval/builder.py,sha256=homctoLPN1wnbsraJWEX82zyL3vANPymhtRAcrnF0qk,4064
304
- orchestrator/search/retrieval/engine.py,sha256=yJn65Nv-HIRJ6yLGwgRxdNAdsXmWvoLUK2WB9PRF1fg,5554
305
- orchestrator/search/retrieval/pagination.py,sha256=-j-vtdPsmUlKCuN7ffwMzEeWG7vLKlJ2NDmRGAGahxM,2773
306
- orchestrator/search/retrieval/retriever.py,sha256=140SMmBnCGhckPC9nZbe0T-DFPpQBNd6w-_3mY0-_Vs,17465
307
- orchestrator/search/retrieval/utils.py,sha256=BhrCqSO5fDlDEzqS81dR-Lpd52JgbnR3YS4h9TXx4Bs,3862
308
- orchestrator/search/retrieval/validation.py,sha256=KvPjvnl67mq2iMbjBc3YLMZ_XMiK3AygRDxrWKAPP_Y,5829
309
- orchestrator/search/schemas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
310
- orchestrator/search/schemas/parameters.py,sha256=X74WzGO6tmiQ9XAQ2GdgIpqt3KzzqvaByrfB_tdWEX4,4110
311
- orchestrator/search/schemas/results.py,sha256=NZtiycuJ54DSMV_pfo4K3heuNgSqaHRqqDYe5GSKqrI,1406
291
+ orchestrator/search/filters/__init__.py,sha256=Yutr21lv8RtZf5OKaBozlYufgmmV2QHuzAPPjvUamLE,1222
292
+ orchestrator/search/filters/base.py,sha256=sk9m65M_viJV08YpBUm8CrLwmb-o9GcqAMXztqVAVtU,11612
293
+ orchestrator/search/filters/date_filters.py,sha256=0a6nbUTK647_Qf4XXZMLDvBLVjF5Qqy9eJ-9SrTGaGg,3040
294
+ orchestrator/search/filters/definitions.py,sha256=wl2HiXlTWXQN4JmuSq2SBuhTMvyIeonTtUZoCrJAK6M,4093
295
+ orchestrator/search/filters/ltree_filters.py,sha256=1OOmM5K90NsGBQmTqyoDlphdAOGd9r2rmz1rNItm8yk,2341
296
+ orchestrator/search/filters/numeric_filter.py,sha256=lcOAOpPNTwA0SW8QPiMOs1oKTYZLwGDQSrwFydXgMUU,2774
297
+ orchestrator/search/indexing/__init__.py,sha256=Or78bizNPiuNOgwLGJQ0mspCF1G_gSe5C9Ap7qi0MZk,662
298
+ orchestrator/search/indexing/indexer.py,sha256=Iw9BZWWpMPdhMffMwpZt1QrACSmmMVFg29R04zYuQgA,14971
299
+ orchestrator/search/indexing/registry.py,sha256=zEOUmQDmZHJ4xzT63VSJzuuHWVTnuBSvhZg4l6lFTUU,3048
300
+ orchestrator/search/indexing/tasks.py,sha256=UKG5EKE_QZGrYMSuVZB8UWw14U3f6enanTzQVbxS3Is,2363
301
+ orchestrator/search/indexing/traverse.py,sha256=NKkKSri-if1d1vwzTQlDCF0hvBdB2IbWWuMdPrQ78Jg,14330
302
+ orchestrator/search/retrieval/__init__.py,sha256=JP5WGYhmjd2RKXEExorvU6koMBLsTLdlDGCR_r1t8ug,645
303
+ orchestrator/search/retrieval/builder.py,sha256=70cEvbsWI1dj-4H-LJq4o6Q71e3WERd-V6bzlZhGtHw,4607
304
+ orchestrator/search/retrieval/engine.py,sha256=b7qEw-DA-JEnOTXNm9baKNqLFpNAy9RHQF9s4oEFzvo,6312
305
+ orchestrator/search/retrieval/exceptions.py,sha256=oHoLGLLxxmVcV-W36uK0V-Pn4vf_iw6hajpQbap3NqI,3588
306
+ orchestrator/search/retrieval/pagination.py,sha256=bRcXtWxxWvOhCQyhjwfJ7S6q_Dn3pYm8TCg7ofjVP44,3353
307
+ orchestrator/search/retrieval/utils.py,sha256=svhF9YfMClq2MVPArS3ir3pg5_e_bremquv_l6tTsOQ,4597
308
+ orchestrator/search/retrieval/validation.py,sha256=AjhttVJWlZDaT1_pUL_LaypQV11U21JpTCE4OwnpoqA,5849
309
+ orchestrator/search/retrieval/retrievers/__init__.py,sha256=1bGmbae0GYRM6e1vxf0ww79NaTSmfOMe9S0pPVmh3CM,897
310
+ orchestrator/search/retrieval/retrievers/base.py,sha256=zg1Sbz_YKBXIfyn69tiTrW8quFvwmeFGNNMU8QQvHfU,4525
311
+ orchestrator/search/retrieval/retrievers/fuzzy.py,sha256=U_WNAaxSUVUlVrmFrYFt-s0ebw9ift1Z2zBHG8TSPLE,3839
312
+ orchestrator/search/retrieval/retrievers/hybrid.py,sha256=iQ977OF7GeAv-Pv2eALAxz-KwOL6EUmooSVnHZl5Dsw,7628
313
+ orchestrator/search/retrieval/retrievers/semantic.py,sha256=oWNJ9DuqM16BXYXUwmRmkfDmp_2vQH2PySNMk8TcvVk,3961
314
+ orchestrator/search/retrieval/retrievers/structured.py,sha256=OHsHEjjLg1QwtEytQNeyWcCBQd8rJxHVf59HxvA9_vc,1452
315
+ orchestrator/search/schemas/__init__.py,sha256=q5G0z3nKjIHKFs1PkEG3nvTUy3Wp4kCyBtCbqUITj3A,579
316
+ orchestrator/search/schemas/parameters.py,sha256=aglbVvvM_gT-zTpVQh05wIUnfn2mD1JKIiWH_VaTqaM,4690
317
+ orchestrator/search/schemas/results.py,sha256=EsbYS8XJ8r5JoN17N4z1lHIShgg7RW973mi6yILcHOI,1987
312
318
  orchestrator/services/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjMZn97n8XzmLU,571
313
319
  orchestrator/services/fixed_inputs.py,sha256=kyz7s2HLzyDulvcq-ZqefTw1om86COvyvTjz0_5CmgI,876
314
320
  orchestrator/services/input_state.py,sha256=6BZOpb3cHpO18K-XG-3QUIV9pIM25_ufdODrp5CmXG4,2390
@@ -360,9 +366,9 @@ orchestrator/workflows/tasks/__init__.py,sha256=GyHNfEFCGKQwRiN6rQmvSRH2iYX7npjM
360
366
  orchestrator/workflows/tasks/cleanup_tasks_log.py,sha256=BfWYbPXhnLAHUJ0mlODDnjZnQQAvKCZJDVTwbwOWI04,1624
361
367
  orchestrator/workflows/tasks/resume_workflows.py,sha256=T3iobSJjVgiupe0rClD34kUZ7KF4pL5yK2AVeRLZog8,4313
362
368
  orchestrator/workflows/tasks/validate_product_type.py,sha256=paG-NAY1bdde3Adt8zItkcBKf5Pxw6f5ngGW6an6dYU,3192
363
- orchestrator/workflows/tasks/validate_products.py,sha256=GZJBoFF-WMphS7ghMs2-gqvV2iL1F0POhk0uSNt93n0,8510
369
+ orchestrator/workflows/tasks/validate_products.py,sha256=kXBGZTkobfYH8e_crhdErT-ypdouH0a3_WLImmbKXcE,8523
364
370
  orchestrator/workflows/translations/en-GB.json,sha256=ST53HxkphFLTMjFHonykDBOZ7-P_KxksktZU3GbxLt0,846
365
- orchestrator_core-4.5.0a2.dist-info/licenses/LICENSE,sha256=b-aA5OZQuuBATmLKo_mln8CQrDPPhg3ghLzjPjLn4Tg,11409
366
- orchestrator_core-4.5.0a2.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
367
- orchestrator_core-4.5.0a2.dist-info/METADATA,sha256=fxKSeq4fVKEvTapkag42vtmn2h1HegxEqXJPHEHDiCc,6177
368
- orchestrator_core-4.5.0a2.dist-info/RECORD,,
371
+ orchestrator_core-4.5.0a3.dist-info/licenses/LICENSE,sha256=b-aA5OZQuuBATmLKo_mln8CQrDPPhg3ghLzjPjLn4Tg,11409
372
+ orchestrator_core-4.5.0a3.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
373
+ orchestrator_core-4.5.0a3.dist-info/METADATA,sha256=m1FaoxcMxdfPSE_HwuQ0QSJc432amisFx_jE82A6lPI,6169
374
+ orchestrator_core-4.5.0a3.dist-info/RECORD,,
@@ -1,447 +0,0 @@
1
- from abc import ABC, abstractmethod
2
- from decimal import Decimal
3
-
4
- import structlog
5
- from sqlalchemy import BindParameter, Numeric, Select, and_, bindparam, case, cast, func, literal, or_, select
6
- from sqlalchemy.sql.expression import ColumnElement
7
-
8
- from orchestrator.db.models import AiSearchIndex
9
- from orchestrator.search.core.types import FieldType, SearchMetadata
10
- from orchestrator.search.schemas.parameters import BaseSearchParameters
11
-
12
- from .pagination import PaginationParams
13
-
14
- logger = structlog.get_logger(__name__)
15
-
16
-
17
- class Retriever(ABC):
18
- """Abstract base class for applying a ranking strategy to a search query."""
19
-
20
- SCORE_PRECISION = 12
21
- SCORE_NUMERIC_TYPE = Numeric(38, 12)
22
- HIGHLIGHT_TEXT_LABEL = "highlight_text"
23
- HIGHLIGHT_PATH_LABEL = "highlight_path"
24
- SCORE_LABEL = "score"
25
- SEARCHABLE_FIELD_TYPES = [
26
- FieldType.STRING.value,
27
- FieldType.UUID.value,
28
- FieldType.BLOCK.value,
29
- FieldType.RESOURCE_TYPE.value,
30
- ]
31
-
32
- @classmethod
33
- async def from_params(
34
- cls,
35
- params: BaseSearchParameters,
36
- pagination_params: PaginationParams,
37
- ) -> "Retriever":
38
- """Create the appropriate retriever instance from search parameters.
39
-
40
- Parameters
41
- ----------
42
- params : BaseSearchParameters
43
- Search parameters including vector queries, fuzzy terms, and filters.
44
- pagination_params : PaginationParams
45
- Pagination parameters for cursor-based paging.
46
-
47
- Returns:
48
- -------
49
- Retriever
50
- A concrete retriever instance (semantic, fuzzy, hybrid, or structured).
51
- """
52
- fuzzy_term = params.fuzzy_term
53
- q_vec = await cls._get_query_vector(params.vector_query, pagination_params.q_vec_override)
54
-
55
- # If semantic search was attempted but failed, fall back to fuzzy with the full query
56
- fallback_fuzzy_term = fuzzy_term
57
- if q_vec is None and params.vector_query is not None and params.query is not None:
58
- fallback_fuzzy_term = params.query
59
-
60
- if q_vec is not None and fallback_fuzzy_term is not None:
61
- return RrfHybridRetriever(q_vec, fallback_fuzzy_term, pagination_params)
62
- if q_vec is not None:
63
- return SemanticRetriever(q_vec, pagination_params)
64
- if fallback_fuzzy_term is not None:
65
- return FuzzyRetriever(fallback_fuzzy_term, pagination_params)
66
-
67
- return StructuredRetriever(pagination_params)
68
-
69
- @classmethod
70
- async def _get_query_vector(
71
- cls, vector_query: str | None, q_vec_override: list[float] | None
72
- ) -> list[float] | None:
73
- """Get query vector either from override or by generating from text."""
74
- if q_vec_override:
75
- return q_vec_override
76
-
77
- if not vector_query:
78
- return None
79
-
80
- from orchestrator.search.core.embedding import QueryEmbedder
81
-
82
- q_vec = await QueryEmbedder.generate_for_text_async(vector_query)
83
- if not q_vec:
84
- logger.warning("Embedding generation failed; using non-semantic retriever")
85
- return None
86
-
87
- return q_vec
88
-
89
- @abstractmethod
90
- def apply(self, candidate_query: Select) -> Select:
91
- """Apply the ranking logic to the given candidate query.
92
-
93
- Parameters
94
- ----------
95
- candidate_query : Select
96
- A SQLAlchemy `Select` statement returning candidate entity IDs.
97
-
98
- Returns:
99
- -------
100
- Select
101
- A new `Select` statement with ranking expressions applied.
102
- """
103
- ...
104
-
105
- def _quantize_score_for_pagination(self, score_value: float) -> BindParameter[Decimal]:
106
- """Convert score value to properly quantized Decimal parameter for pagination."""
107
- pas_dec = Decimal(str(score_value)).quantize(Decimal("0.000000000001"))
108
- return literal(pas_dec, type_=self.SCORE_NUMERIC_TYPE)
109
-
110
- @property
111
- @abstractmethod
112
- def metadata(self) -> SearchMetadata:
113
- """Return metadata describing this search strategy."""
114
- ...
115
-
116
-
117
- class StructuredRetriever(Retriever):
118
- """Applies a dummy score for purely structured searches with no text query."""
119
-
120
- def __init__(self, pagination_params: PaginationParams) -> None:
121
- self.page_after_id = pagination_params.page_after_id
122
-
123
- def apply(self, candidate_query: Select) -> Select:
124
- cand = candidate_query.subquery()
125
- stmt = select(cand.c.entity_id, literal(1.0).label("score")).select_from(cand)
126
-
127
- if self.page_after_id:
128
- stmt = stmt.where(cand.c.entity_id > self.page_after_id)
129
-
130
- return stmt.order_by(cand.c.entity_id.asc())
131
-
132
- @property
133
- def metadata(self) -> SearchMetadata:
134
- return SearchMetadata.structured()
135
-
136
-
137
- class FuzzyRetriever(Retriever):
138
- """Ranks results based on the max of fuzzy text similarity scores."""
139
-
140
- def __init__(self, fuzzy_term: str, pagination_params: PaginationParams) -> None:
141
- self.fuzzy_term = fuzzy_term
142
- self.page_after_score = pagination_params.page_after_score
143
- self.page_after_id = pagination_params.page_after_id
144
-
145
- def apply(self, candidate_query: Select) -> Select:
146
- cand = candidate_query.subquery()
147
-
148
- similarity_expr = func.word_similarity(self.fuzzy_term, AiSearchIndex.value)
149
-
150
- raw_max = func.max(similarity_expr).over(partition_by=AiSearchIndex.entity_id)
151
- score = cast(
152
- func.round(cast(raw_max, self.SCORE_NUMERIC_TYPE), self.SCORE_PRECISION), self.SCORE_NUMERIC_TYPE
153
- ).label(self.SCORE_LABEL)
154
-
155
- combined_query = (
156
- select(
157
- AiSearchIndex.entity_id,
158
- score,
159
- func.first_value(AiSearchIndex.value)
160
- .over(partition_by=AiSearchIndex.entity_id, order_by=[similarity_expr.desc(), AiSearchIndex.path.asc()])
161
- .label(self.HIGHLIGHT_TEXT_LABEL),
162
- func.first_value(AiSearchIndex.path)
163
- .over(partition_by=AiSearchIndex.entity_id, order_by=[similarity_expr.desc(), AiSearchIndex.path.asc()])
164
- .label(self.HIGHLIGHT_PATH_LABEL),
165
- )
166
- .select_from(AiSearchIndex)
167
- .join(cand, cand.c.entity_id == AiSearchIndex.entity_id)
168
- .where(
169
- and_(
170
- AiSearchIndex.value_type.in_(self.SEARCHABLE_FIELD_TYPES),
171
- literal(self.fuzzy_term).op("<%")(AiSearchIndex.value),
172
- )
173
- )
174
- .distinct(AiSearchIndex.entity_id)
175
- )
176
- final_query = combined_query.subquery("ranked_fuzzy")
177
-
178
- stmt = select(
179
- final_query.c.entity_id,
180
- final_query.c.score,
181
- final_query.c.highlight_text,
182
- final_query.c.highlight_path,
183
- ).select_from(final_query)
184
-
185
- stmt = self._apply_score_pagination(stmt, final_query.c.score, final_query.c.entity_id)
186
-
187
- return stmt.order_by(final_query.c.score.desc().nulls_last(), final_query.c.entity_id.asc())
188
-
189
- @property
190
- def metadata(self) -> SearchMetadata:
191
- return SearchMetadata.fuzzy()
192
-
193
- def _apply_score_pagination(
194
- self, stmt: Select, score_column: ColumnElement, entity_id_column: ColumnElement
195
- ) -> Select:
196
- """Apply standard score + entity_id pagination."""
197
- if self.page_after_score is not None and self.page_after_id is not None:
198
- stmt = stmt.where(
199
- or_(
200
- score_column < self.page_after_score,
201
- and_(
202
- score_column == self.page_after_score,
203
- entity_id_column > self.page_after_id,
204
- ),
205
- )
206
- )
207
- return stmt
208
-
209
-
210
- class SemanticRetriever(Retriever):
211
- """Ranks results based on the minimum semantic vector distance."""
212
-
213
- def __init__(self, vector_query: list[float], pagination_params: PaginationParams) -> None:
214
- self.vector_query = vector_query
215
- self.page_after_score = pagination_params.page_after_score
216
- self.page_after_id = pagination_params.page_after_id
217
-
218
- def apply(self, candidate_query: Select) -> Select:
219
- cand = candidate_query.subquery()
220
-
221
- dist = AiSearchIndex.embedding.l2_distance(self.vector_query)
222
-
223
- raw_min = func.min(dist).over(partition_by=AiSearchIndex.entity_id)
224
-
225
- # Normalize score to preserve ordering in accordance with other retrievers:
226
- # smaller distance = higher score
227
- similarity = literal(1.0, type_=self.SCORE_NUMERIC_TYPE) / (
228
- literal(1.0, type_=self.SCORE_NUMERIC_TYPE) + cast(raw_min, self.SCORE_NUMERIC_TYPE)
229
- )
230
-
231
- score = cast(
232
- func.round(cast(similarity, self.SCORE_NUMERIC_TYPE), self.SCORE_PRECISION), self.SCORE_NUMERIC_TYPE
233
- ).label(self.SCORE_LABEL)
234
-
235
- combined_query = (
236
- select(
237
- AiSearchIndex.entity_id,
238
- score,
239
- func.first_value(AiSearchIndex.value)
240
- .over(partition_by=AiSearchIndex.entity_id, order_by=[dist.asc(), AiSearchIndex.path.asc()])
241
- .label(self.HIGHLIGHT_TEXT_LABEL),
242
- func.first_value(AiSearchIndex.path)
243
- .over(partition_by=AiSearchIndex.entity_id, order_by=[dist.asc(), AiSearchIndex.path.asc()])
244
- .label(self.HIGHLIGHT_PATH_LABEL),
245
- )
246
- .select_from(AiSearchIndex)
247
- .join(cand, cand.c.entity_id == AiSearchIndex.entity_id)
248
- .where(AiSearchIndex.embedding.isnot(None))
249
- .distinct(AiSearchIndex.entity_id)
250
- )
251
- final_query = combined_query.subquery("ranked_semantic")
252
-
253
- stmt = select(
254
- final_query.c.entity_id,
255
- final_query.c.score,
256
- final_query.c.highlight_text,
257
- final_query.c.highlight_path,
258
- ).select_from(final_query)
259
-
260
- stmt = self._apply_semantic_pagination(stmt, final_query.c.score, final_query.c.entity_id)
261
-
262
- return stmt.order_by(final_query.c.score.desc().nulls_last(), final_query.c.entity_id.asc())
263
-
264
- @property
265
- def metadata(self) -> SearchMetadata:
266
- return SearchMetadata.semantic()
267
-
268
- def _apply_semantic_pagination(
269
- self, stmt: Select, score_column: ColumnElement, entity_id_column: ColumnElement
270
- ) -> Select:
271
- """Apply semantic score pagination with precise Decimal handling."""
272
- if self.page_after_score is not None and self.page_after_id is not None:
273
- score_param = self._quantize_score_for_pagination(self.page_after_score)
274
- stmt = stmt.where(
275
- or_(
276
- score_column < score_param,
277
- and_(score_column == score_param, entity_id_column > self.page_after_id),
278
- )
279
- )
280
- return stmt
281
-
282
-
283
- class RrfHybridRetriever(Retriever):
284
- """Reciprocal Rank Fusion of semantic and fuzzy ranking with parent-child retrieval."""
285
-
286
- def __init__(
287
- self,
288
- q_vec: list[float],
289
- fuzzy_term: str,
290
- pagination_params: PaginationParams,
291
- k: int = 60,
292
- field_candidates_limit: int = 100,
293
- ) -> None:
294
- self.q_vec = q_vec
295
- self.fuzzy_term = fuzzy_term
296
- self.page_after_score = pagination_params.page_after_score
297
- self.page_after_id = pagination_params.page_after_id
298
- self.k = k
299
- self.field_candidates_limit = field_candidates_limit
300
-
301
- def apply(self, candidate_query: Select) -> Select:
302
- cand = candidate_query.subquery()
303
- q_param: BindParameter[list[float]] = bindparam("q_vec", self.q_vec, type_=AiSearchIndex.embedding.type)
304
-
305
- best_similarity = func.word_similarity(self.fuzzy_term, AiSearchIndex.value)
306
- sem_expr = case(
307
- (AiSearchIndex.embedding.is_(None), None),
308
- else_=AiSearchIndex.embedding.op("<->")(q_param),
309
- )
310
- sem_val = func.coalesce(sem_expr, literal(1.0)).label("semantic_distance")
311
-
312
- filter_condition = literal(self.fuzzy_term).op("<%")(AiSearchIndex.value)
313
-
314
- field_candidates = (
315
- select(
316
- AiSearchIndex.entity_id,
317
- AiSearchIndex.path,
318
- AiSearchIndex.value,
319
- sem_val,
320
- best_similarity.label("fuzzy_score"),
321
- )
322
- .select_from(AiSearchIndex)
323
- .join(cand, cand.c.entity_id == AiSearchIndex.entity_id)
324
- .where(
325
- and_(
326
- AiSearchIndex.value_type.in_(self.SEARCHABLE_FIELD_TYPES),
327
- filter_condition,
328
- )
329
- )
330
- .order_by(
331
- best_similarity.desc().nulls_last(),
332
- sem_expr.asc().nulls_last(),
333
- AiSearchIndex.entity_id.asc(),
334
- )
335
- .limit(self.field_candidates_limit)
336
- ).cte("field_candidates")
337
-
338
- entity_scores = (
339
- select(
340
- field_candidates.c.entity_id,
341
- func.avg(field_candidates.c.semantic_distance).label("avg_semantic_distance"),
342
- func.avg(field_candidates.c.fuzzy_score).label("avg_fuzzy_score"),
343
- ).group_by(field_candidates.c.entity_id)
344
- ).cte("entity_scores")
345
-
346
- entity_highlights = (
347
- select(
348
- field_candidates.c.entity_id,
349
- func.first_value(field_candidates.c.value)
350
- .over(
351
- partition_by=field_candidates.c.entity_id,
352
- order_by=[field_candidates.c.fuzzy_score.desc(), field_candidates.c.path.asc()],
353
- )
354
- .label(self.HIGHLIGHT_TEXT_LABEL),
355
- func.first_value(field_candidates.c.path)
356
- .over(
357
- partition_by=field_candidates.c.entity_id,
358
- order_by=[field_candidates.c.fuzzy_score.desc(), field_candidates.c.path.asc()],
359
- )
360
- .label(self.HIGHLIGHT_PATH_LABEL),
361
- ).distinct(field_candidates.c.entity_id)
362
- ).cte("entity_highlights")
363
-
364
- ranked = (
365
- select(
366
- entity_scores.c.entity_id,
367
- entity_scores.c.avg_semantic_distance,
368
- entity_scores.c.avg_fuzzy_score,
369
- entity_highlights.c.highlight_text,
370
- entity_highlights.c.highlight_path,
371
- func.dense_rank()
372
- .over(
373
- order_by=[entity_scores.c.avg_semantic_distance.asc().nulls_last(), entity_scores.c.entity_id.asc()]
374
- )
375
- .label("sem_rank"),
376
- func.dense_rank()
377
- .over(order_by=[entity_scores.c.avg_fuzzy_score.desc().nulls_last(), entity_scores.c.entity_id.asc()])
378
- .label("fuzzy_rank"),
379
- ).select_from(
380
- entity_scores.join(entity_highlights, entity_scores.c.entity_id == entity_highlights.c.entity_id)
381
- )
382
- ).cte("ranked_results")
383
-
384
- # RRF (rank-based)
385
- rrf_raw = (1.0 / (self.k + ranked.c.sem_rank)) + (1.0 / (self.k + ranked.c.fuzzy_rank))
386
- rrf_num = cast(rrf_raw, self.SCORE_NUMERIC_TYPE)
387
-
388
- # Perfect flag to boost near perfect fuzzy matches as this most likely indicates the desired record.
389
- perfect = case((ranked.c.avg_fuzzy_score >= 0.9, 1), else_=0).label("perfect_match")
390
-
391
- # Dynamic beta based on k (and number of sources)
392
- # rrf_max = n_sources / (k + 1)
393
- k_num = literal(float(self.k), type_=self.SCORE_NUMERIC_TYPE)
394
- n_sources = literal(2.0, type_=self.SCORE_NUMERIC_TYPE) # semantic + fuzzy
395
- rrf_max = n_sources / (k_num + literal(1.0, type_=self.SCORE_NUMERIC_TYPE))
396
-
397
- # Choose a small positive margin above rrf_max to ensure strict separation
398
- # Keep it small to avoid compressing perfects near 1 after normalization
399
- margin = rrf_max * literal(0.05, type_=self.SCORE_NUMERIC_TYPE) # 5% above bound
400
- beta = rrf_max + margin
401
-
402
- fused_num = rrf_num + beta * cast(perfect, self.SCORE_NUMERIC_TYPE)
403
-
404
- # Normalize to [0,1] via the theoretical max (beta + rrf_max)
405
- norm_den = beta + rrf_max
406
- normalized_score = fused_num / norm_den
407
-
408
- score = cast(
409
- func.round(cast(normalized_score, self.SCORE_NUMERIC_TYPE), self.SCORE_PRECISION),
410
- self.SCORE_NUMERIC_TYPE,
411
- ).label(self.SCORE_LABEL)
412
-
413
- stmt = select(
414
- ranked.c.entity_id,
415
- score,
416
- ranked.c.highlight_text,
417
- ranked.c.highlight_path,
418
- perfect.label("perfect_match"),
419
- ).select_from(ranked)
420
-
421
- stmt = self._apply_fused_pagination(stmt, score, ranked.c.entity_id)
422
-
423
- return stmt.order_by(
424
- score.desc().nulls_last(),
425
- ranked.c.entity_id.asc(),
426
- ).params(q_vec=self.q_vec)
427
-
428
- def _apply_fused_pagination(
429
- self,
430
- stmt: Select,
431
- score_column: ColumnElement,
432
- entity_id_column: ColumnElement,
433
- ) -> Select:
434
- """Keyset paginate by fused score + id."""
435
- if self.page_after_score is not None and self.page_after_id is not None:
436
- score_param = self._quantize_score_for_pagination(self.page_after_score)
437
- stmt = stmt.where(
438
- or_(
439
- score_column < score_param,
440
- and_(score_column == score_param, entity_id_column > self.page_after_id),
441
- )
442
- )
443
- return stmt
444
-
445
- @property
446
- def metadata(self) -> SearchMetadata:
447
- return SearchMetadata.hybrid()