aait 2.3.15.996__tar.gz → 2.3.15.998__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (283) hide show
  1. aait-2.3.15.998/PKG-INFO +32 -0
  2. aait-2.3.15.998/aait.egg-info/PKG-INFO +32 -0
  3. {aait-2.3.15.996 → aait-2.3.15.998}/aait.egg-info/SOURCES.txt +4 -0
  4. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/__init__.py +2 -2
  5. aait-2.3.15.998/orangecontrib/AAIT/llm/chunking.py +113 -0
  6. aait-2.3.15.998/orangecontrib/AAIT/llm/wordchunker_deprecated.py +333 -0
  7. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/MetManagement.py +42 -0
  8. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/base_widget.py +10 -2
  9. aait-2.3.15.998/orangecontrib/AAIT/utils/tools/TigerODM_notepad.py +1 -0
  10. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/widget_positioning.py +117 -117
  11. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWChunking.py +10 -1
  12. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWEmptySwitch.py +2 -2
  13. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWLMStudio.py +200 -200
  14. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWLargeLanguageModel.py +10 -6
  15. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWLoadDocuments.py +1 -1
  16. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_Falcon.py +7 -4
  17. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_Mistral.py +7 -4
  18. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_Qwen.py +7 -4
  19. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_Qwen1B5_Q6.py +7 -4
  20. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_Qwen2_5_32B.py +7 -4
  21. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_Qwen3B_Q4.py +7 -4
  22. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q4.py +7 -4
  23. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q6.py +7 -4
  24. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_Solar.py +7 -4
  25. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_SolarUncensored.py +7 -4
  26. aait-2.3.15.998/orangecontrib/AAIT/widgets/OWSplitPath.py +135 -0
  27. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWTrigger.py +1 -1
  28. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owchunking.ui +8 -3
  29. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owloaddocuments.ui +1 -1
  30. aait-2.3.15.998/orangecontrib/AAIT/widgets/designer/owsplitpath.ui +131 -0
  31. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/CreateInstance.svg +107 -107
  32. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/Table.svg +85 -85
  33. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/quadrantclicker.svg +135 -135
  34. aait-2.3.15.998/orangecontrib/AAIT/widgets/icons/split_path.png +0 -0
  35. {aait-2.3.15.996 → aait-2.3.15.998}/setup.py +1 -1
  36. aait-2.3.15.996/PKG-INFO +0 -8
  37. aait-2.3.15.996/aait.egg-info/PKG-INFO +0 -8
  38. aait-2.3.15.996/orangecontrib/AAIT/llm/chunking.py +0 -271
  39. aait-2.3.15.996/orangecontrib/AAIT/utils/tools/TigerODM_notepad.py +0 -1
  40. {aait-2.3.15.996 → aait-2.3.15.998}/aait.egg-info/dependency_links.txt +0 -0
  41. {aait-2.3.15.996 → aait-2.3.15.998}/aait.egg-info/entry_points.txt +0 -0
  42. {aait-2.3.15.996 → aait-2.3.15.998}/aait.egg-info/namespace_packages.txt +0 -0
  43. {aait-2.3.15.996 → aait-2.3.15.998}/aait.egg-info/requires.txt +0 -0
  44. {aait-2.3.15.996 → aait-2.3.15.998}/aait.egg-info/top_level.txt +0 -0
  45. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/llm/__init__.py +0 -0
  46. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/llm/answers.py +0 -0
  47. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/llm/answers_llama.py +0 -0
  48. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/llm/embeddings.py +0 -0
  49. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/llm/lemmes.py +0 -0
  50. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/llm/lmstudio.py +0 -0
  51. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/llm/process_documents.py +0 -0
  52. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/llm/prompt_management.py +0 -0
  53. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/llm/resources/__ini__.py +0 -0
  54. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/llm/resources/markdown_recipe.json +0 -0
  55. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/llm/translations.py +0 -0
  56. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/optimiser/__init__.py +0 -0
  57. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/optimiser/optuna_multi.py +0 -0
  58. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/OperationSystem.py +0 -0
  59. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/SimpleDialogQt.py +0 -0
  60. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/__init__.py +0 -0
  61. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/aait_repo_file.py +0 -0
  62. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/aait_table_viewer.py +0 -0
  63. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/delta_local_shared_fodler.py +0 -0
  64. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/import_uic.py +0 -0
  65. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/initialize_from_ini.py +0 -0
  66. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/mac_utils.py +0 -0
  67. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/shared_functions.py +0 -0
  68. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/shared_variables.py +0 -0
  69. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/subprocess_management.py +0 -0
  70. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/thread_management.py +0 -0
  71. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/tools/__init__.py +0 -0
  72. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/tools/change_owcorpus.py +0 -0
  73. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/tools/concat_splitted_pypi.py +0 -0
  74. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/tools/first_time_check.py +0 -0
  75. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/tools/owcorpus_ok.txt +0 -0
  76. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/unlink_table_domain.py +0 -0
  77. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/utils/windows_utils.py +0 -0
  78. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWAAITResourcesManager.py +0 -0
  79. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWAccumulator.py +0 -0
  80. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWAddColumns.py +0 -0
  81. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWApplyRules.py +0 -0
  82. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWAutoShowCreateInstance.py +0 -0
  83. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWAutoShowTable.py +0 -0
  84. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWCN2rule_view.py +0 -0
  85. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWConcatRules.py +0 -0
  86. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWConverseLLM.py +0 -0
  87. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWCreateEmbeddings.py +0 -0
  88. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWDisplayMD.py +0 -0
  89. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWEditTable.py +0 -0
  90. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWEndLoop.py +0 -0
  91. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWExecuteScript.py +0 -0
  92. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWExtraChunks.py +0 -0
  93. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWExtractTokens.py +0 -0
  94. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWFileMetadata.py +0 -0
  95. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWFileSyncChecker.py +0 -0
  96. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWFileWithPath.py +0 -0
  97. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWFindFilesFromDir.py +0 -0
  98. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWFusionNM.py +0 -0
  99. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWGenerateQuestions.py +0 -0
  100. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWGenerateSynthesis.py +0 -0
  101. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWGenerateWord.py +0 -0
  102. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWGetPages.py +0 -0
  103. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWInputSelector.py +0 -0
  104. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWKeywords.py +0 -0
  105. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWKeywordsDetection.py +0 -0
  106. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWLLMEngine.py +0 -0
  107. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWLanguageDetection.py +0 -0
  108. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWLemmatizer.py +0 -0
  109. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_CE_MiniLML6.py +0 -0
  110. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_Embeddings.py +0 -0
  111. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_HelsinkiEnFr.py +0 -0
  112. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_HelsinkiFrEn.py +0 -0
  113. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_MPNET.py +0 -0
  114. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_EN.py +0 -0
  115. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_FR.py +0 -0
  116. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWOperationSystem.py +0 -0
  117. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWOptimisation.py +0 -0
  118. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWOptimisationSelection.py +0 -0
  119. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWOptimisationSendScore.py +0 -0
  120. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWProcessDocumentsFromPath.py +0 -0
  121. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWQuadrantclicker.py +0 -0
  122. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWQueryLLM.py +0 -0
  123. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWRandomData.py +0 -0
  124. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWReranking.py +0 -0
  125. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWSaveFilepathEntry.py +0 -0
  126. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWSelectColumnDynamique.py +0 -0
  127. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWSelectRowsDynamic.py +0 -0
  128. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWSortAndSelect.py +0 -0
  129. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWSplitExcelSheets.py +0 -0
  130. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWStartLoop.py +0 -0
  131. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWTable2Corpus.py +0 -0
  132. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWTranslation.py +0 -0
  133. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/OWUnzipFolder.py +0 -0
  134. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/POW_Wfactory.py +0 -0
  135. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/__init__.py +0 -0
  136. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owMarkdown.ui +0 -0
  137. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/ow_OperationSystem.ui +0 -0
  138. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/ow_in_or_out_path.ui +0 -0
  139. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/ow_widget_random_data.ui +0 -0
  140. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owaccumulator.ui +0 -0
  141. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owaddcolumns.ui +0 -0
  142. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owapplyrules.ui +0 -0
  143. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owconcatrules.ui +0 -0
  144. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owconversellm.ui +0 -0
  145. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owedgellm.ui +0 -0
  146. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owembeddings.ui +0 -0
  147. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owemptyswitch.ui +0 -0
  148. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owendloop.ui +0 -0
  149. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owexecutescript.ui +0 -0
  150. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owexecutescript_TEST.ui +0 -0
  151. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owextrachunks.ui +0 -0
  152. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owextracttokens.ui +0 -0
  153. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owfilemetadata.ui +0 -0
  154. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owfilesyncchecker.ui +0 -0
  155. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owfilewithpath.ui +0 -0
  156. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owfindfilesfromdir.ui +0 -0
  157. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owfusion_nm.ui +0 -0
  158. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owgenerate_word.ui +0 -0
  159. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owgeneratequestions.ui +0 -0
  160. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owgeneratesynthesis.ui +0 -0
  161. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owgetpages.ui +0 -0
  162. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owkeyword.ui +0 -0
  163. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owkeywordsdetection.ui +0 -0
  164. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owlangdetect.ui +0 -0
  165. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owlargelanguagemodel.ui +0 -0
  166. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owlemmatizer.ui +0 -0
  167. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owllm4all.ui +0 -0
  168. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owloadworkflow.ui +0 -0
  169. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_ce_minilml6.ui +0 -0
  170. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_embeddings.ui +0 -0
  171. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_falcon.ui +0 -0
  172. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_en_fr.ui +0 -0
  173. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_fr_en.ui +0 -0
  174. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_mistral.ui +0 -0
  175. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_mpnet.ui +0 -0
  176. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_qwen.ui +0 -0
  177. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_2.5_32b.ui +0 -0
  178. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_1.5b_q6.ui +0 -0
  179. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_3b_q4.ui +0 -0
  180. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_7b_q4.ui +0 -0
  181. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_7b_q6.ui +0 -0
  182. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_qwencoder_7b.ui +0 -0
  183. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_solar.ui +0 -0
  184. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_solar_uncensored.ui +0 -0
  185. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_en.ui +0 -0
  186. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_fr.ui +0 -0
  187. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/ownumberpointinrules.ui +0 -0
  188. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owprocessdocuments.ui +0 -0
  189. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owquadrant_clicker.ui +0 -0
  190. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owqueryllm.ui +0 -0
  191. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owreranking.ui +0 -0
  192. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owsavewithpath.ui +0 -0
  193. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owselect_column_dynamic.ui +0 -0
  194. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owselect_row_dynamic.ui +0 -0
  195. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owsortandselect.ui +0 -0
  196. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owsplitexcelsheets.ui +0 -0
  197. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owstartloop.ui +0 -0
  198. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owtable2corpus.ui +0 -0
  199. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owtranslation.ui +0 -0
  200. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owtrigger.ui +0 -0
  201. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/designer/owunzipfolder.ui +0 -0
  202. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/CN2RuleViewer.svg +0 -0
  203. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/MDViewer.png +0 -0
  204. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/Mistral.png +0 -0
  205. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/apply_rules.svg +0 -0
  206. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/blue_down_arrow.svg +0 -0
  207. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/book.png +0 -0
  208. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/category.svg +0 -0
  209. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/dark_green.txt +0 -0
  210. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/de.png +0 -0
  211. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/document_generator.png +0 -0
  212. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/documents.png +0 -0
  213. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/edge_llm.svg +0 -0
  214. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/endloop.png +0 -0
  215. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/extra_chunks.png +0 -0
  216. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/green_check.svg +0 -0
  217. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/in_or_out.png +0 -0
  218. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/input.png +0 -0
  219. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/languages.png +0 -0
  220. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/lm_studio.png +0 -0
  221. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/local_interf_img_multi_pull.svg +0 -0
  222. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/local_interf_multi_pull.svg +0 -0
  223. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/local_interf_pull.svg +0 -0
  224. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/local_interf_push.svg +0 -0
  225. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/local_interf_text_pull.svg +0 -0
  226. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/logo_solar.svg +0 -0
  227. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/logo_uncensoredsolar.svg +0 -0
  228. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/logo_upload.png +0 -0
  229. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/models.png +0 -0
  230. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/operationSystem.png +0 -0
  231. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/optimisation.png +0 -0
  232. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/optimizer.png +0 -0
  233. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/output.png +0 -0
  234. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owCN2_intersect_rules.svg +0 -0
  235. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owaccumulator.png +0 -0
  236. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owchunking.png +0 -0
  237. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owconversellm.svg +0 -0
  238. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owedittable.svg +0 -0
  239. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owembeddings.svg +0 -0
  240. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owemptyswitch.svg +0 -0
  241. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owenvinfo.png +0 -0
  242. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owexecutescript.svg +0 -0
  243. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owextracttokens.svg +0 -0
  244. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owfilemetadata.svg +0 -0
  245. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owfilesfromdir.svg +0 -0
  246. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owfilesyncchecker.svg +0 -0
  247. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owfilewithpath.svg +0 -0
  248. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owfusion_nm.png +0 -0
  249. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owgeneratequestions.svg +0 -0
  250. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owgeneratesynthesis.png +0 -0
  251. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owkeywords.png +0 -0
  252. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owkeywordsdetection.png +0 -0
  253. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owlargelanguagemodel.svg +0 -0
  254. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owlemmatizer.svg +0 -0
  255. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owloaddocuments.svg +0 -0
  256. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owmodel_ce_minilml6.svg +0 -0
  257. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owmodel_embeddings.svg +0 -0
  258. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owmodel_falcon.png +0 -0
  259. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_en_fr.svg +0 -0
  260. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_fr_en.svg +0 -0
  261. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owmodel_mpnet.svg +0 -0
  262. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_en.svg +0 -0
  263. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_fr.svg +0 -0
  264. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owoptimisation.svg +0 -0
  265. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owoptimisationselection.png +0 -0
  266. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owqueryllm.svg +0 -0
  267. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owreranking.svg +0 -0
  268. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owsavefilepathentry.svg +0 -0
  269. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owselectcolumndynamique.png +0 -0
  270. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owsortandselect.svg +0 -0
  271. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owtable2corpus.svg +0 -0
  272. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owtranslation.svg +0 -0
  273. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/owtrigger.svg +0 -0
  274. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/processdocuments.svg +0 -0
  275. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/qwen-color.png +0 -0
  276. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/select_dynamic_row.png +0 -0
  277. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/splitexcelsheets.png +0 -0
  278. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/startloop.png +0 -0
  279. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/tools.png +0 -0
  280. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/widgetFactory.svg +0 -0
  281. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/AAIT/widgets/icons/zip.svg +0 -0
  282. {aait-2.3.15.996 → aait-2.3.15.998}/orangecontrib/__init__.py +0 -0
  283. {aait-2.3.15.996 → aait-2.3.15.998}/setup.cfg +0 -0
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: aait
3
+ Version: 2.3.15.998
4
+ Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
5
+ Home-page:
6
+ Author: Orange community
7
+ Author-email:
8
+ Keywords: orange3 add-on
9
+ Requires-Dist: torch
10
+ Requires-Dist: sentence-transformers
11
+ Requires-Dist: gpt4all[all]
12
+ Requires-Dist: sacremoses
13
+ Requires-Dist: transformers
14
+ Requires-Dist: sentencepiece
15
+ Requires-Dist: optuna
16
+ Requires-Dist: spacy
17
+ Requires-Dist: markdown
18
+ Requires-Dist: python-multipart
19
+ Requires-Dist: PyMuPDF
20
+ Requires-Dist: chonkie
21
+ Requires-Dist: GPUtil
22
+ Requires-Dist: unidecode
23
+ Requires-Dist: python-docx
24
+ Requires-Dist: psutil
25
+ Requires-Dist: thefuzz
26
+ Requires-Dist: beautifulsoup4
27
+ Requires-Dist: rank_bm25
28
+ Requires-Dist: CATEGORIT
29
+ Dynamic: author
30
+ Dynamic: keywords
31
+ Dynamic: requires-dist
32
+ Dynamic: summary
@@ -0,0 +1,32 @@
1
+ Metadata-Version: 2.4
2
+ Name: aait
3
+ Version: 2.3.15.998
4
+ Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
5
+ Home-page:
6
+ Author: Orange community
7
+ Author-email:
8
+ Keywords: orange3 add-on
9
+ Requires-Dist: torch
10
+ Requires-Dist: sentence-transformers
11
+ Requires-Dist: gpt4all[all]
12
+ Requires-Dist: sacremoses
13
+ Requires-Dist: transformers
14
+ Requires-Dist: sentencepiece
15
+ Requires-Dist: optuna
16
+ Requires-Dist: spacy
17
+ Requires-Dist: markdown
18
+ Requires-Dist: python-multipart
19
+ Requires-Dist: PyMuPDF
20
+ Requires-Dist: chonkie
21
+ Requires-Dist: GPUtil
22
+ Requires-Dist: unidecode
23
+ Requires-Dist: python-docx
24
+ Requires-Dist: psutil
25
+ Requires-Dist: thefuzz
26
+ Requires-Dist: beautifulsoup4
27
+ Requires-Dist: rank_bm25
28
+ Requires-Dist: CATEGORIT
29
+ Dynamic: author
30
+ Dynamic: keywords
31
+ Dynamic: requires-dist
32
+ Dynamic: summary
@@ -18,6 +18,7 @@ orangecontrib/AAIT/llm/lmstudio.py
18
18
  orangecontrib/AAIT/llm/process_documents.py
19
19
  orangecontrib/AAIT/llm/prompt_management.py
20
20
  orangecontrib/AAIT/llm/translations.py
21
+ orangecontrib/AAIT/llm/wordchunker_deprecated.py
21
22
  orangecontrib/AAIT/llm/resources/__ini__.py
22
23
  orangecontrib/AAIT/llm/resources/markdown_recipe.json
23
24
  orangecontrib/AAIT/optimiser/__init__.py
@@ -113,6 +114,7 @@ orangecontrib/AAIT/widgets/OWSelectColumnDynamique.py
113
114
  orangecontrib/AAIT/widgets/OWSelectRowsDynamic.py
114
115
  orangecontrib/AAIT/widgets/OWSortAndSelect.py
115
116
  orangecontrib/AAIT/widgets/OWSplitExcelSheets.py
117
+ orangecontrib/AAIT/widgets/OWSplitPath.py
116
118
  orangecontrib/AAIT/widgets/OWStartLoop.py
117
119
  orangecontrib/AAIT/widgets/OWTable2Corpus.py
118
120
  orangecontrib/AAIT/widgets/OWTranslation.py
@@ -183,6 +185,7 @@ orangecontrib/AAIT/widgets/designer/owselect_column_dynamic.ui
183
185
  orangecontrib/AAIT/widgets/designer/owselect_row_dynamic.ui
184
186
  orangecontrib/AAIT/widgets/designer/owsortandselect.ui
185
187
  orangecontrib/AAIT/widgets/designer/owsplitexcelsheets.ui
188
+ orangecontrib/AAIT/widgets/designer/owsplitpath.ui
186
189
  orangecontrib/AAIT/widgets/designer/owstartloop.ui
187
190
  orangecontrib/AAIT/widgets/designer/owtable2corpus.ui
188
191
  orangecontrib/AAIT/widgets/designer/owtranslation.ui
@@ -266,6 +269,7 @@ orangecontrib/AAIT/widgets/icons/processdocuments.svg
266
269
  orangecontrib/AAIT/widgets/icons/quadrantclicker.svg
267
270
  orangecontrib/AAIT/widgets/icons/qwen-color.png
268
271
  orangecontrib/AAIT/widgets/icons/select_dynamic_row.png
272
+ orangecontrib/AAIT/widgets/icons/split_path.png
269
273
  orangecontrib/AAIT/widgets/icons/splitexcelsheets.png
270
274
  orangecontrib/AAIT/widgets/icons/startloop.png
271
275
  orangecontrib/AAIT/widgets/icons/tools.png
@@ -42,11 +42,11 @@ if not check_executable_length():
42
42
  exit(0)
43
43
 
44
44
 
45
- target_version = version.parse("3.37")
45
+ target_version = version.parse("3.40")
46
46
  current_version = version.parse(Orange.version.version)
47
47
  if current_version < target_version: # Skip the file
48
48
  print("Orange version not compatible with all of AAIT functions !")
49
-
49
+ SimpleDialogQt.BoxError("Orange version not compatible with all of AAIT functions !")
50
50
 
51
51
  else: # Execute the file
52
52
  import os
@@ -0,0 +1,113 @@
1
+ import copy
2
+ import re
3
+ import os
4
+ import pathlib
5
+ import Orange
6
+ from Orange.data import Domain, Table, StringVariable, ContinuousVariable
7
+ from sentence_transformers import SentenceTransformer
8
+
9
+ ### Chonkie
10
+ from chonkie import TokenChunker, SentenceChunker, RecursiveChunker, SemanticChunker, LateChunker, CodeChunker
11
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
12
+ from Orange.widgets.orangecontrib.AAIT.llm import wordchunker_deprecated
13
+ from Orange.widgets.orangecontrib.AAIT.utils.MetManagement import get_local_store_path
14
+ else:
15
+ from orangecontrib.AAIT.llm import wordchunker_deprecated
16
+ from orangecontrib.AAIT.utils.MetManagement import get_local_store_path
17
+
18
+
19
+ def create_chunks(table, column_name, tokenizer="character", chunk_size=300, chunk_overlap=100, mode="tokens",
20
+ progress_callback=None, argself=None):
21
+ """
22
+ Chunk the text in `column_name` of an Orange Table using a specialized chunker.
23
+
24
+ Splits each row's text into chunks based on the selected mode (Token, Sentence,
25
+ Recursive, or Markdown). Adds the chunked text and its metadata as new meta
26
+ columns to the table.
27
+
28
+ Parameters:
29
+ table (Table): Input data table.
30
+ column_name (str): Name of the text column to chunk.
31
+ tokenizer (str): Tokenizer type (e.g., "character").
32
+ chunk_size (int): Target chunk size.
33
+ chunk_overlap (int): Overlap between chunks (not used in all modes).
34
+ mode (str): Chunking strategy ("Token", "Sentence", "Recursive", "Markdown").
35
+ progress_callback (callable): Optional progress reporter.
36
+ argself: Optional caller reference.
37
+
38
+ Returns:
39
+ Table: The table with added meta columns: "Chunks", "Chunks size", and "Metadata".
40
+ """
41
+ print("This widget is being updated : default tokenizer 'character' enabled for compatibility !!")
42
+ tokenizer = "character"
43
+
44
+ # Définir la fonction de chunking selon le mode
45
+ if mode == "tokens":
46
+ chunker = TokenChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
47
+ elif mode == "words":
48
+ path_ugly = os.path.join(get_local_store_path(), "Models", "NLP", "all-mpnet-base-v2")
49
+ tokenizer_word = SentenceTransformer(path_ugly, device="cpu")
50
+ chunker = wordchunker_deprecated.WordChunker(tokenizer=tokenizer_word, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
51
+ elif mode == "sentence":
52
+ chunker = SentenceChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap,
53
+ min_sentences_per_chunk=1)
54
+ elif mode == "markdown":
55
+ markdown_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources/markdown_recipe.json")
56
+ chunker = RecursiveChunker.from_recipe(path=markdown_path,
57
+ tokenizer=tokenizer,
58
+ chunk_size=chunk_size,
59
+ min_characters_per_chunk=1)
60
+
61
+ # TODO : Ajouter la gestion des paramètres dans le .ui
62
+ # Nécessite des "rules" pour faire un chunking différent de Token ou Sentence
63
+ elif mode == "Recursive":
64
+ chunker = RecursiveChunker(tokenizer=tokenizer, chunk_size=chunk_size, min_characters_per_chunk=24)
65
+ # À tester avant d'ajouter la fonctionnalité dans l'UI
66
+ # Model d'embeddings REQUIS !
67
+ elif mode == "Semantic":
68
+ chunker = SemanticChunker(embedding_model=tokenizer, threshold=0.7, chunk_size=chunk_size, similarity_window=3)
69
+ # À tester avant d'ajouter la fonctionnalité dans l'UI
70
+ # Model d'embeddings REQUIS !
71
+ elif mode == "Late":
72
+ chunker = LateChunker(embedding_model=tokenizer, chunk_size=chunk_size, min_characters_per_chunk=24)
73
+ elif mode == "Code":
74
+ chunker = CodeChunker("blabla")
75
+ else:
76
+ raise ValueError(f"Invalid mode: {mode}. Valid modes are: Token, Sentence, Recursive, Markdown")
77
+
78
+ new_metas = list(table.domain.metas) + [StringVariable("Chunks"),
79
+ ContinuousVariable("Chunks size"),
80
+ ContinuousVariable("Chunks index"),
81
+ ContinuousVariable("Chunks start"),
82
+ ContinuousVariable("Chunks end"),
83
+ StringVariable("Metadata")]
84
+ new_domain = Domain(table.domain.attributes, table.domain.class_vars, new_metas)
85
+
86
+ new_rows = []
87
+ for i, row in enumerate(table):
88
+ content = row[column_name].value
89
+ chunks = chunker(content)
90
+ # For each chunk in the chunked data
91
+ for j, chunk in enumerate(chunks):
92
+ # Build new metas with previous data and the chunk
93
+ new_metas_values = list(row.metas) + [chunk.text,
94
+ chunk.token_count,
95
+ j, # Chunks index
96
+ chunk.start_index,
97
+ chunk.end_index,
98
+ ""] # Metadata
99
+ # Create the new row instance
100
+ new_instance = Orange.data.Instance(new_domain,
101
+ [row[x] for x in table.domain.attributes] + [row[y] for y in
102
+ table.domain.class_vars] + new_metas_values)
103
+ # Store the new row
104
+ new_rows.append(new_instance)
105
+
106
+ if progress_callback is not None:
107
+ progress_value = float(100 * (i + 1) / len(table))
108
+ progress_callback(progress_value)
109
+ if argself is not None:
110
+ if argself.stop:
111
+ break
112
+
113
+ return Table.from_list(domain=new_domain, rows=new_rows)
@@ -0,0 +1,333 @@
1
+ # wordchunker_deprecated.py
2
+ # -----------------------------------------------------------------------------
3
+ # Compatibility shim reproducing chonkie==0.4.1 WordChunker behavior (including
4
+ # historical quirks/bugs), while being usable inside chonkie>=1.x pipelines.
5
+ #
6
+ # What you get:
7
+ # - A local WordChunker class with the same logic as chonkie 0.4.1 word chunker.
8
+ # - Preserves newlines: words are spans like r"(\s*\S+)" joined with "".
9
+ # - chunk_size / chunk_overlap are TOKEN budgets (not "word" counts), measured
10
+ # via the provided tokenizer backend.
11
+ # - Reproduces 0.4.1 overlap-loop quirk: iterates range(previous_chunk_length)
12
+ # where previous_chunk_length is a TOKEN count (not word count).
13
+ # - Reproduces 0.4.1 final-chunk quirk: _create_chunk called without current_index
14
+ # (defaults to 0), which can yield start_index via .find from the start.
15
+ #
16
+ # Added for practicality in your AAIT / chonkie>=1.5.2 integration:
17
+ # - WordChunker is callable: chunker(text) == chunker.chunk(text)
18
+ # - Accepts tokenizer="character" (character-count pseudo tokenizer)
19
+ # - Accepts a SentenceTransformer instance as tokenizer (uses .tokenizer underneath)
20
+ #
21
+ # NOTE (important bugfix vs earlier draft):
22
+ # Many HuggingFace tokenizers are *callable* (tokenizer(text) -> BatchEncoding).
23
+ # We must NOT mis-detect them as a generic callable token counter. We detect
24
+ # transformers/tokenizers backends *first* by attributes, then fall back to callable.
25
+ # -----------------------------------------------------------------------------
26
+
27
+ from __future__ import annotations
28
+
29
+ import importlib
30
+ import inspect
31
+ import re
32
+ from dataclasses import dataclass
33
+ from typing import Any, Callable, List, Union
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class Chunk:
38
+ text: str
39
+ start_index: int
40
+ end_index: int
41
+ token_count: int
42
+
43
+
44
+ # -----------------------------
45
+ # Helpers for robust tokenizers
46
+ # -----------------------------
47
+
48
+ def _unwrap_possible_sentence_transformer(obj: Any) -> Any:
49
+ """
50
+ If `obj` looks like a SentenceTransformer, return its underlying HF tokenizer
51
+ when available. SentenceTransformer usually has `.tokenizer`.
52
+ """
53
+ if obj is None:
54
+ return obj
55
+ try:
56
+ if hasattr(obj, "tokenizer") and "SentenceTransformer" in type(obj).__name__:
57
+ tok = getattr(obj, "tokenizer", None)
58
+ if tok is not None:
59
+ return tok
60
+ except Exception:
61
+ pass
62
+ return obj
63
+
64
+
65
+ def _make_character_tokenizer() -> Any:
66
+ """
67
+ Special compatibility: when user passes tokenizer="character", count tokens
68
+ as characters, and support batch encoding.
69
+ """
70
+ class _CharTokenizer:
71
+ def encode(self, text: str):
72
+ return list(range(len(text)))
73
+
74
+ def encode_batch(self, texts: List[str]):
75
+ return [self.encode(t) for t in texts]
76
+
77
+ return _CharTokenizer()
78
+
79
+
80
+ # -----------------------------
81
+ # Minimal BaseChunker (0.4.1-ish)
82
+ # -----------------------------
83
+
84
+ class BaseChunker:
85
+ """
86
+ Minimal subset of chonkie.chunker.base.BaseChunker needed by WordChunker 0.4.1
87
+ """
88
+
89
+ def __init__(self, tokenizer_or_token_counter: Union[str, Any, Callable[[str], int]]):
90
+ tokenizer_or_token_counter = _unwrap_possible_sentence_transformer(tokenizer_or_token_counter)
91
+
92
+ if tokenizer_or_token_counter == "character":
93
+ tokenizer_or_token_counter = _make_character_tokenizer()
94
+
95
+ if isinstance(tokenizer_or_token_counter, str):
96
+ self.tokenizer = self._load_tokenizer(tokenizer_or_token_counter)
97
+ else:
98
+ self.tokenizer = tokenizer_or_token_counter
99
+
100
+ self._tokenizer_backend = self._get_tokenizer_backend()
101
+ self.token_counter = self._get_tokenizer_counter()
102
+
103
+ def _get_tokenizer_backend(self) -> str:
104
+ t = self.tokenizer
105
+ tname = type(t).__name__
106
+ ttype = str(type(t))
107
+
108
+ # 1) transformers-style tokenizer (callable, returns BatchEncoding, has encode, often batch_encode_plus)
109
+ if hasattr(t, "batch_encode_plus") or "transformers" in ttype or "PreTrainedTokenizer" in tname:
110
+ return "transformers"
111
+
112
+ # 2) tokenizers rust-style (has encode_batch with add_special_tokens)
113
+ if "tokenizers" in ttype:
114
+ return "tokenizers"
115
+
116
+ # 3) tiktoken encodings
117
+ if "tiktoken" in ttype:
118
+ return "tiktoken"
119
+
120
+ # 4) our custom / other encoders that implement encode_batch
121
+ if hasattr(t, "encode_batch"):
122
+ return "encode_batch"
123
+
124
+ # 5) basic encode-only objects
125
+ if hasattr(t, "encode"):
126
+ return "encode_only"
127
+
128
+ # 6) finally: a callable token *counter* function: f(text)->int
129
+ # (must be LAST so we don't mis-detect HF tokenizers as callable counters)
130
+ if callable(t) or inspect.isfunction(t) or inspect.ismethod(t):
131
+ return "callable_counter"
132
+
133
+ raise ValueError(f"Tokenizer backend {ttype} not supported")
134
+
135
+ def _load_tokenizer(self, tokenizer_name: str):
136
+ # Same overall strategy as 0.4.1: try tiktoken -> autotiktokenizer -> tokenizers -> transformers
137
+ try:
138
+ if importlib.util.find_spec("tiktoken") is not None:
139
+ from tiktoken import get_encoding
140
+ return get_encoding(tokenizer_name)
141
+ raise RuntimeError("tiktoken not available")
142
+ except Exception:
143
+ try:
144
+ if importlib.util.find_spec("autotiktokenizer") is not None:
145
+ from autotiktokenizer import AutoTikTokenizer
146
+ return AutoTikTokenizer.from_pretrained(tokenizer_name)
147
+ raise RuntimeError("autotiktokenizer not available")
148
+ except Exception:
149
+ try:
150
+ if importlib.util.find_spec("tokenizers") is not None:
151
+ from tokenizers import Tokenizer
152
+ return Tokenizer.from_pretrained(tokenizer_name)
153
+ raise RuntimeError("tokenizers not available")
154
+ except Exception:
155
+ if importlib.util.find_spec("transformers") is not None:
156
+ from transformers import AutoTokenizer
157
+ return AutoTokenizer.from_pretrained(tokenizer_name)
158
+ raise ValueError(
159
+ "Tokenizer not found in: transformers, tokenizers, autotiktokenizer, tiktoken. "
160
+ "Install one of these."
161
+ )
162
+
163
+ def _get_tokenizer_counter(self) -> Callable[[str], int]:
164
+ t = self.tokenizer
165
+ if self._tokenizer_backend == "transformers":
166
+ return lambda text: len(t.encode(text, add_special_tokens=False))
167
+ if self._tokenizer_backend == "tokenizers":
168
+ return lambda text: len(t.encode(text, add_special_tokens=False).ids)
169
+ if self._tokenizer_backend == "tiktoken":
170
+ return lambda text: len(t.encode(text))
171
+ if self._tokenizer_backend == "encode_batch":
172
+ if hasattr(t, "encode"):
173
+ return lambda text: len(t.encode(text))
174
+ return lambda text: len(t.encode_batch([text])[0])
175
+ if self._tokenizer_backend == "encode_only":
176
+ return lambda text: len(t.encode(text))
177
+ if self._tokenizer_backend == "callable_counter":
178
+ return t # type: ignore[return-value]
179
+ raise ValueError("Tokenizer backend not supported for token counting")
180
+
181
+ def _encode_batch(self, texts: List[str]) -> List[List[int]]:
182
+ """
183
+ Return list of token-id lists. Only lengths are used by WordChunker.
184
+ """
185
+ t = self.tokenizer
186
+ if self._tokenizer_backend == "transformers":
187
+ # batch_encode_plus exists on most HF tokenizers; if not, fall back to __call__
188
+ if hasattr(t, "batch_encode_plus"):
189
+ return t.batch_encode_plus(texts, add_special_tokens=False)["input_ids"]
190
+ # Fallback: tokenizer(texts, add_special_tokens=False) -> BatchEncoding with input_ids
191
+ enc = t(texts, add_special_tokens=False)
192
+ return enc["input_ids"]
193
+ if self._tokenizer_backend == "tokenizers":
194
+ return [e.ids for e in t.encode_batch(texts, add_special_tokens=False)]
195
+ if self._tokenizer_backend == "tiktoken":
196
+ return t.encode_batch(texts)
197
+ if self._tokenizer_backend == "encode_batch":
198
+ return t.encode_batch(texts)
199
+ if self._tokenizer_backend == "encode_only":
200
+ return [t.encode(x) for x in texts]
201
+ if self._tokenizer_backend == "callable_counter":
202
+ # emulate "ids" with dummy list of length == token_count
203
+ out: List[List[int]] = []
204
+ for x in texts:
205
+ n = int(t(x))
206
+ out.append(list(range(n)))
207
+ return out
208
+ raise ValueError(f"Tokenizer backend {self._tokenizer_backend} not supported.")
209
+
210
+
211
+ # -----------------------------
212
+ # WordChunker (exact 0.4.1 logic)
213
+ # -----------------------------
214
+
215
+ class WordChunker(BaseChunker):
216
+ """
217
+ Exact port of chonkie==0.4.1 WordChunker (chunker/word.py), with identical behavior/quirks.
218
+ """
219
+
220
+ def __init__(
221
+ self,
222
+ tokenizer: Union[str, Any] = "gpt2",
223
+ chunk_size: int = 512,
224
+ chunk_overlap: int = 128,
225
+ ):
226
+ super().__init__(tokenizer)
227
+
228
+ if chunk_size <= 0:
229
+ raise ValueError("chunk_size must be positive")
230
+ if chunk_overlap >= chunk_size:
231
+ raise ValueError("chunk_overlap must be less than chunk_size")
232
+
233
+ self.chunk_size = chunk_size
234
+ self.chunk_overlap = chunk_overlap
235
+
236
+ def _split_into_words(self, text: str) -> List[str]:
237
+ split_points = [match.end() for match in re.finditer(r"(\s*\S+)", text)]
238
+ words: List[str] = []
239
+ prev = 0
240
+
241
+ for point in split_points:
242
+ words.append(text[prev:point])
243
+ prev = point
244
+
245
+ if prev < len(text):
246
+ words.append(text[prev:])
247
+
248
+ return words
249
+
250
+ def _create_chunk(
251
+ self,
252
+ words: List[str],
253
+ text: str,
254
+ token_count: int,
255
+ current_index: int = 0,
256
+ ) -> Chunk:
257
+ chunk_text = "".join(words)
258
+ start_index = text.find(chunk_text, current_index)
259
+ return Chunk(
260
+ text=chunk_text,
261
+ start_index=start_index,
262
+ end_index=start_index + len(chunk_text),
263
+ token_count=token_count,
264
+ )
265
+
266
+ def _get_word_list_token_counts(self, words: List[str]) -> List[int]:
267
+ words = [word for word in words if word != ""]
268
+ encodings = self._encode_batch(words)
269
+ return [len(encoding) for encoding in encodings]
270
+
271
+ def chunk(self, text: str) -> List[Chunk]:
272
+ if not text or not text.strip():
273
+ return []
274
+
275
+ words = self._split_into_words(text)
276
+ lengths = self._get_word_list_token_counts(words)
277
+ chunks: List[Chunk] = []
278
+
279
+ current_chunk: List[str] = []
280
+ current_chunk_length = 0
281
+ current_index = 0
282
+
283
+ for i, (word, length) in enumerate(zip(words, lengths)):
284
+ if current_chunk_length + length <= self.chunk_size:
285
+ current_chunk.append(word)
286
+ current_chunk_length += length
287
+ else:
288
+ chunk = self._create_chunk(current_chunk, text, current_chunk_length, current_index)
289
+ chunks.append(chunk)
290
+
291
+ previous_chunk_length = current_chunk_length
292
+ current_index = chunk.end_index
293
+
294
+ overlap: List[str] = []
295
+ overlap_length = 0
296
+
297
+ # Quirk/bug-compatible loop: previous_chunk_length is token count
298
+ for j in range(0, previous_chunk_length):
299
+ cwi = i - 1 - j
300
+ if cwi < 0:
301
+ break
302
+ oword = words[cwi]
303
+ olength = lengths[cwi]
304
+ if overlap_length + olength <= self.chunk_overlap:
305
+ overlap.append(oword)
306
+ overlap_length += olength
307
+ else:
308
+ break
309
+
310
+ current_chunk = [w for w in reversed(overlap)]
311
+ current_chunk_length = overlap_length
312
+
313
+ current_chunk.append(word)
314
+ current_chunk_length += length
315
+
316
+ if current_chunk:
317
+ # Quirk/bug-compatible: current_index not passed (defaults to 0)
318
+ chunk = self._create_chunk(current_chunk, text, current_chunk_length)
319
+ chunks.append(chunk)
320
+
321
+ return chunks
322
+
323
+ def __call__(self, text: str) -> List[Chunk]:
324
+ return self.chunk(text)
325
+
326
+ def __repr__(self) -> str:
327
+ return f"WordChunker(chunk_size={self.chunk_size}, chunk_overlap={self.chunk_overlap})"
328
+
329
+
330
+ def chunk_words(content: str, tokenizer: Any, chunk_size: int = 300, chunk_overlap: int = 100):
331
+ chunker = WordChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
332
+ chunks = chunker.chunk(content)
333
+ return [c.text for c in chunks], []
@@ -1473,6 +1473,48 @@ def create_trigger_table():
1473
1473
  return table
1474
1474
 
1475
1475
 
1476
+ def ensure_file_exists_recursive(path_holder: list[str]) -> bool:
1477
+ """
1478
+ Vérifie l'existence d'un fichier.
1479
+
1480
+ - Si le chemin absolu existe → True
1481
+ - Sinon, cherche récursivement le fichier dans le dossier parent
1482
+ - Si trouvé → met à jour path_holder[0] avec le nouveau chemin absolu → True
1483
+ - Sinon → False (sans modifier path_holder)
1484
+
1485
+ Args:
1486
+ path_holder (list[str]): liste mutable contenant le chemin absolu du fichier
1487
+
1488
+ Returns:
1489
+ bool
1490
+ """
1491
+ if not path_holder or not path_holder[0]:
1492
+ return False
1493
+
1494
+ original_path = Path(path_holder[0])
1495
+
1496
+ # 1️⃣ Cas simple : le fichier existe déjà
1497
+ if original_path.is_file():
1498
+ return True
1499
+
1500
+ parent_dir = original_path.parent
1501
+ filename = original_path.name
1502
+
1503
+ # 2️⃣ Sécurité minimale
1504
+ if not parent_dir.is_dir():
1505
+ return False
1506
+
1507
+ # 3️⃣ Recherche récursive dans les sous-dossiers
1508
+ for found in parent_dir.rglob(filename):
1509
+ if found.is_file():
1510
+ path_holder[0] = str(found.resolve())
1511
+ return True
1512
+
1513
+ # 4️⃣ Rien trouvé → ne pas modifier le chemin
1514
+ return False
1515
+
1516
+
1517
+
1476
1518
  if __name__ == "__main__":
1477
1519
  # avant faire un bouton
1478
1520
  # set_aait_store_remote_ressources_path(ressource_path)
@@ -54,6 +54,14 @@ class BaseListWidget(widget.OWWidget, openclass=True):
54
54
  self.var_selector.add_variables(self.data.domain)
55
55
  self.var_selector.select_variable_by_name(self.selected_column_name)
56
56
  ```
57
+ 7. You can use
58
+ autorun = Setting(True)
59
+ and self.autorun = True / self.autorun = False
60
+ to control whether the process is triggered automatically when input data is received.
61
+
62
+ 8.self.data is the input data stream. Make sure to create
63
+ new_data = self.data.copy()
64
+ to avoid issues when the process is triggered several time via a push button.
57
65
  """
58
66
  # Settings
59
67
  selected_column_name = Setting("Default") # set the targeted column by default as "Default"
@@ -91,8 +99,8 @@ class BaseListWidget(widget.OWWidget, openclass=True):
91
99
 
92
100
  # Data management
93
101
  self.data = None
94
- self.autorun = True
95
-
102
+ # If self.autorun exists, it is preserved. Otherwise, it is set to True.
103
+ self.autorun = getattr(self, "autorun", True)
96
104
 
97
105
  def on_variable_selected(self, var_name):
98
106
  """Update the selected column when the user clicks an item."""