aait 2.1.2__tar.gz → 2.1.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (271) hide show
  1. aait-2.1.2.1/License.txt +6 -0
  2. aait-2.1.2.1/PKG-INFO +28 -0
  3. aait-2.1.2.1/aait.egg-info/PKG-INFO +28 -0
  4. {aait-2.1.2 → aait-2.1.2.1}/aait.egg-info/SOURCES.txt +20 -6
  5. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/chunking.py +38 -33
  6. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/process_documents.py +100 -71
  7. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/MetManagement.py +2 -2
  8. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/mac_utils.py +0 -8
  9. aait-2.1.2.1/orangecontrib/AAIT/utils/tools/first_time_check.py +14 -0
  10. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/windows_utils.py +0 -45
  11. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWAddColumns.py +23 -6
  12. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWConverseLLM.py +1 -1
  13. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWExecuteScript.py +4 -0
  14. aait-2.1.2.1/orangecontrib/AAIT/widgets/OWFileMetadata.py +183 -0
  15. aait-2.1.2.1/orangecontrib/AAIT/widgets/OWFileSyncChecker.py +282 -0
  16. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWFindFilesFromDir.py +41 -6
  17. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWGenerateQuestions.py +1 -1
  18. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWGenerateSynthesis.py +1 -1
  19. aait-2.1.2.1/orangecontrib/AAIT/widgets/OWGenerateWord.py +483 -0
  20. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWInputSelector.py +6 -9
  21. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWLemmatizer.py +1 -1
  22. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen3B_Q4.py +4 -1
  23. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q6.py +4 -1
  24. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_EN.py +1 -1
  25. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_FR.py +1 -1
  26. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWProcessDocumentsFromPath.py +1 -1
  27. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWQueryLLM.py +1 -1
  28. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWReranking.py +1 -1
  29. aait-2.1.2.1/orangecontrib/AAIT/widgets/designer/ow_in_or_out_path.ui +133 -0
  30. aait-2.1.2.1/orangecontrib/AAIT/widgets/designer/owfilemetadata.ui +41 -0
  31. aait-2.1.2.1/orangecontrib/AAIT/widgets/designer/owfilesyncchecker.ui +43 -0
  32. aait-2.1.2.1/orangecontrib/AAIT/widgets/designer/owgenerate_word.ui +54 -0
  33. aait-2.1.2.1/orangecontrib/AAIT/widgets/icons/owfilemetadata.svg +4 -0
  34. aait-2.1.2.1/orangecontrib/AAIT/widgets/icons/owfilesyncchecker.svg +45 -0
  35. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/processdocuments.svg +12 -0
  36. {aait-2.1.2 → aait-2.1.2.1}/setup.py +1 -1
  37. aait-2.1.2.1/tests/test_class_values_context_handler.py +75 -0
  38. aait-2.1.2.1/tests/test_credentials.py +76 -0
  39. aait-2.1.2.1/tests/test_domain_context_handler.py +401 -0
  40. aait-2.1.2.1/tests/test_gui.py +140 -0
  41. aait-2.1.2.1/tests/test_matplotlib_export.py +43 -0
  42. aait-2.1.2.1/tests/test_perfect_domain_context_handler.py +148 -0
  43. aait-2.1.2.1/tests/test_scatterplot_density.py +59 -0
  44. aait-2.1.2.1/tests/test_settings_handler.py +27 -0
  45. aait-2.1.2.1/tests/test_widgets_outputs.py +29 -0
  46. aait-2.1.2.1/tests/test_workflows.py +80 -0
  47. aait-2.1.2/PKG-INFO +0 -8
  48. aait-2.1.2/aait.egg-info/PKG-INFO +0 -8
  49. aait-2.1.2/orangecontrib/AAIT/llm/GPT4ALL.py +0 -613
  50. aait-2.1.2/orangecontrib/AAIT/llm/GPT4ALL_killer.py +0 -68
  51. aait-2.1.2/orangecontrib/AAIT/utils/tools/first_time_check.py +0 -14
  52. aait-2.1.2/orangecontrib/AAIT/widgets/OWLLM4ALL.py +0 -304
  53. aait-2.1.2/orangecontrib/AAIT/widgets/designer/ow_in_or_out_path.ui +0 -107
  54. aait-2.1.2/orangecontrib/AAIT/widgets/designer/owllm4all.ui +0 -70
  55. aait-2.1.2/orangecontrib/AAIT/widgets/icons/llm4all.svg +0 -72
  56. {aait-2.1.2 → aait-2.1.2.1}/aait.egg-info/dependency_links.txt +0 -0
  57. {aait-2.1.2 → aait-2.1.2.1}/aait.egg-info/entry_points.txt +0 -0
  58. {aait-2.1.2 → aait-2.1.2.1}/aait.egg-info/namespace_packages.txt +0 -0
  59. {aait-2.1.2 → aait-2.1.2.1}/aait.egg-info/requires.txt +0 -0
  60. {aait-2.1.2 → aait-2.1.2.1}/aait.egg-info/top_level.txt +0 -0
  61. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/__init__.py +0 -0
  62. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/SignalReceiver.py +0 -0
  63. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/__init__.py +0 -0
  64. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/audit_widget.py +0 -0
  65. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/00079473A - Copie (2).TIF +0 -0
  66. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/00079473A - Copie (3).TIF +0 -0
  67. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/00079473A - Copie (4).TIF +0 -0
  68. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/Tir 81mm_0001.csv +0 -0
  69. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/Tir 81mm_0002.csv +0 -0
  70. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/Tir 81mm_0003.csv +0 -0
  71. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dynamic_results.py +0 -0
  72. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/test_all_widgets.py +0 -0
  73. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/test_server.py +0 -0
  74. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_4all.py +0 -0
  75. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_chunking.py +0 -0
  76. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_edit_table.py +0 -0
  77. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_mpnet_create_embeddings.py +0 -0
  78. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_optimisation.py +0 -0
  79. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_optimisationselection.py +0 -0
  80. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_powfactory.py +0 -0
  81. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_queryllm.py +0 -0
  82. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_spacy_md_fr_lemmatizer.py +0 -0
  83. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_traduction.py +0 -0
  84. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widgets_model.py +0 -0
  85. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/encapsulation/__init__.py +0 -0
  86. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/fix_torch/fix_torch_dll_error.py +0 -0
  87. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/fix_torch/libomp140.x86_64.dll +0 -0
  88. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/MergeBaseLora.py +0 -0
  89. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/SERV_kill_workflow.py +0 -0
  90. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/SERV_requests.py +0 -0
  91. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/SERV_start_workflow.py +0 -0
  92. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/Tutorial_Finetuning.py +0 -0
  93. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/Tutorial_TestFinetuning.py +0 -0
  94. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/__init__.py +0 -0
  95. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/answers.py +0 -0
  96. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/embeddings.py +0 -0
  97. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/finetuning.py +0 -0
  98. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/functions_DatasetGeneration.py +0 -0
  99. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/functions_Finetuning.py +0 -0
  100. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/lemmes.py +0 -0
  101. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/lmstudio.py +0 -0
  102. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/main_DatasetGeneration.py +0 -0
  103. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/main_Finetuning.py +0 -0
  104. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/prompt_management.py +0 -0
  105. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/quickpy.py +0 -0
  106. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/test.py +0 -0
  107. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/test_functions.py +0 -0
  108. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/llm/translations.py +0 -0
  109. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/optimiser/__init__.py +0 -0
  110. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/optimiser/optuna_multi.py +0 -0
  111. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/CheckMetaData.py +0 -0
  112. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/SimpleDialogQt.py +0 -0
  113. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/__init__.py +0 -0
  114. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/aait_repo_file.py +0 -0
  115. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/check_data_in.py +0 -0
  116. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/delta_local_shared_fodler.py +0 -0
  117. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/import_uic.py +0 -0
  118. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/initialize_from_ini.py +0 -0
  119. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/shared_functions.py +0 -0
  120. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/shared_variables.py +0 -0
  121. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/subprocess_management.py +0 -0
  122. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/thread_management.py +0 -0
  123. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/tools/__init__.py +0 -0
  124. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/tools/change_owcorpus.py +0 -0
  125. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/tools/concat_splitted_pypi.py +0 -0
  126. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/utils/tools/owcorpus_ok.txt +0 -0
  127. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWAAITResourcesManager.py +0 -0
  128. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWApplyRules.py +0 -0
  129. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWCN2rule_view.py +0 -0
  130. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWChunking.py +0 -0
  131. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWConcatRules.py +0 -0
  132. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWCreateEmbeddings.py +0 -0
  133. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWDisplayMD.py +0 -0
  134. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWEditTable.py +0 -0
  135. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWEndLoop.py +0 -0
  136. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWExtraChunks.py +0 -0
  137. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWExtractTokens.py +0 -0
  138. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWFileWithPath.py +0 -0
  139. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWKeywords.py +0 -0
  140. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWLMStudio.py +0 -0
  141. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWLanguageDetection.py +0 -0
  142. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_CE_MiniLML6.py +0 -0
  143. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Falcon.py +0 -0
  144. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_HelsinkiEnFr.py +0 -0
  145. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_HelsinkiFrEn.py +0 -0
  146. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_MPNET.py +0 -0
  147. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Mistral.py +0 -0
  148. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen.py +0 -0
  149. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen1B5_Q6.py +0 -0
  150. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen2_5_32B.py +0 -0
  151. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q4.py +0 -0
  152. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Solar.py +0 -0
  153. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_SolarUncensored.py +0 -0
  154. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWOptimisation.py +0 -0
  155. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWOptimisationSelection.py +0 -0
  156. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWOptimisationSendScore.py +0 -0
  157. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWRandomData.py +0 -0
  158. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWSaveFilepathEntry.py +0 -0
  159. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWSelectRowsDynamic.py +0 -0
  160. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWStartLoop.py +0 -0
  161. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWTable2Corpus.py +0 -0
  162. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWTranslation.py +0 -0
  163. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWTrigger.py +0 -0
  164. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWUnzipFolder.py +0 -0
  165. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/POW_Wfactory.py +0 -0
  166. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/__init__.py +0 -0
  167. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owMarkdown.ui +0 -0
  168. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/ow_widget_random_data.ui +0 -0
  169. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owaddcolumns.ui +0 -0
  170. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owapplyrules.ui +0 -0
  171. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owchunking.ui +0 -0
  172. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owconcatrules.ui +0 -0
  173. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owconversellm.ui +0 -0
  174. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owembeddings.ui +0 -0
  175. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owendloop.ui +0 -0
  176. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owexecutescript.ui +0 -0
  177. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owextrachunks.ui +0 -0
  178. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owextracttokens.ui +0 -0
  179. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owfilewithpath.ui +0 -0
  180. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owfindfilesfromdir.ui +0 -0
  181. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owgeneratequestions.ui +0 -0
  182. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owgeneratesynthesis.ui +0 -0
  183. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owkeyword.ui +0 -0
  184. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owlangdetect.ui +0 -0
  185. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owlemmatizer.ui +0 -0
  186. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owloadworkflow.ui +0 -0
  187. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_ce_minilml6.ui +0 -0
  188. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_falcon.ui +0 -0
  189. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_en_fr.ui +0 -0
  190. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_fr_en.ui +0 -0
  191. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_mistral.ui +0 -0
  192. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_mpnet.ui +0 -0
  193. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_qwen.ui +0 -0
  194. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_solar.ui +0 -0
  195. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_solar_uncensored.ui +0 -0
  196. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_en.ui +0 -0
  197. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_fr.ui +0 -0
  198. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/ownumberpointinrules.ui +0 -0
  199. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owprocessdocuments.ui +0 -0
  200. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owqueryllm.ui +0 -0
  201. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owreranking.ui +0 -0
  202. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owsavewithpath.ui +0 -0
  203. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owselect_row_dynamic.ui +0 -0
  204. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owstartloop.ui +0 -0
  205. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owtable2corpus.ui +0 -0
  206. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owtranslation.ui +0 -0
  207. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owtrigger.ui +0 -0
  208. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owunzipfolder.ui +0 -0
  209. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/CN2RuleViewer.svg +0 -0
  210. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/MDViewer.png +0 -0
  211. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/Mistral.png +0 -0
  212. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/apply_rules.svg +0 -0
  213. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/category.svg +0 -0
  214. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/dark_green.txt +0 -0
  215. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/de.png +0 -0
  216. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/documents.png +0 -0
  217. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/endloop.png +0 -0
  218. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/extra_chunks.png +0 -0
  219. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/in_or_out.png +0 -0
  220. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/input.png +0 -0
  221. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/keyword.png +0 -0
  222. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/languages.png +0 -0
  223. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/lm_studio.png +0 -0
  224. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/local_interf_img_multi_pull.svg +0 -0
  225. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/local_interf_multi_pull.svg +0 -0
  226. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/local_interf_pull.svg +0 -0
  227. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/local_interf_push.svg +0 -0
  228. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/local_interf_text_pull.svg +0 -0
  229. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/logo_solar.svg +0 -0
  230. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/logo_uncensoredsolar.svg +0 -0
  231. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/logo_upload.png +0 -0
  232. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/models.png +0 -0
  233. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/optimisation.png +0 -0
  234. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/optimizer.png +0 -0
  235. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/output.png +0 -0
  236. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owCN2_intersect_rules.svg +0 -0
  237. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owchunking.png +0 -0
  238. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owconversellm.svg +0 -0
  239. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owedittable.svg +0 -0
  240. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owembeddings.svg +0 -0
  241. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owenvinfo.png +0 -0
  242. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owexecutescript.svg +0 -0
  243. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owextracttokens.svg +0 -0
  244. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owfilesfromdir.svg +0 -0
  245. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owfilewithpath.svg +0 -0
  246. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owgeneratequestions.svg +0 -0
  247. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owgeneratesynthesis.png +0 -0
  248. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owlemmatizer.svg +0 -0
  249. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_ce_minilml6.svg +0 -0
  250. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_falcon.png +0 -0
  251. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_en_fr.svg +0 -0
  252. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_fr_en.svg +0 -0
  253. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_mpnet.svg +0 -0
  254. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_en.svg +0 -0
  255. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_fr.svg +0 -0
  256. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owoptimisation.svg +0 -0
  257. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owoptimisationselection.png +0 -0
  258. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owqueryllm.svg +0 -0
  259. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owreranking.svg +0 -0
  260. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owsavefilepathentry.svg +0 -0
  261. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owtable2corpus.svg +0 -0
  262. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owtranslation.svg +0 -0
  263. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owtrigger.svg +0 -0
  264. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/qwen-color.png +0 -0
  265. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/select_dynamic_row.png +0 -0
  266. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/startloop.png +0 -0
  267. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/tools.png +0 -0
  268. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/widgetFactory.svg +0 -0
  269. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/zip.svg +0 -0
  270. {aait-2.1.2 → aait-2.1.2.1}/orangecontrib/__init__.py +0 -0
  271. {aait-2.1.2 → aait-2.1.2.1}/setup.cfg +0 -0
@@ -0,0 +1,6 @@
1
+ THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT ANY WARRANTY WHATSOEVER.
2
+
3
+ If you use or redistribute this software, you are permitted to do so
4
+ under the terms of GNU [GPL-3.0]+ license.
5
+
6
+ [GPL-3.0]: https://www.gnu.org/licenses/gpl-3.0.en.html
aait-2.1.2.1/PKG-INFO ADDED
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.1
2
+ Name: aait
3
+ Version: 2.1.2.1
4
+ Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
5
+ Home-page:
6
+ Author: Orange community
7
+ Author-email:
8
+ Keywords: orange3 add-on
9
+ License-File: License.txt
10
+ Requires-Dist: torch
11
+ Requires-Dist: sentence-transformers==5.0.0
12
+ Requires-Dist: gpt4all[all]==2.8.2
13
+ Requires-Dist: sacremoses==0.1.1
14
+ Requires-Dist: transformers==4.51.3
15
+ Requires-Dist: sentencepiece==0.2.0
16
+ Requires-Dist: optuna
17
+ Requires-Dist: spacy==3.7.6
18
+ Requires-Dist: markdown
19
+ Requires-Dist: python-multipart
20
+ Requires-Dist: PyMuPDF==1.24.14
21
+ Requires-Dist: chonkie==0.4.1
22
+ Requires-Dist: GPUtil==1.4.0
23
+ Requires-Dist: unidecode==1.3.8
24
+ Requires-Dist: python-docx==1.1.2
25
+ Requires-Dist: psutil
26
+ Requires-Dist: thefuzz==0.22.1
27
+ Requires-Dist: beautifulsoup4==4.12.3
28
+ Requires-Dist: CATEGORIT
@@ -0,0 +1,28 @@
1
+ Metadata-Version: 2.1
2
+ Name: aait
3
+ Version: 2.1.2.1
4
+ Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
5
+ Home-page:
6
+ Author: Orange community
7
+ Author-email:
8
+ Keywords: orange3 add-on
9
+ License-File: License.txt
10
+ Requires-Dist: torch
11
+ Requires-Dist: sentence-transformers==5.0.0
12
+ Requires-Dist: gpt4all[all]==2.8.2
13
+ Requires-Dist: sacremoses==0.1.1
14
+ Requires-Dist: transformers==4.51.3
15
+ Requires-Dist: sentencepiece==0.2.0
16
+ Requires-Dist: optuna
17
+ Requires-Dist: spacy==3.7.6
18
+ Requires-Dist: markdown
19
+ Requires-Dist: python-multipart
20
+ Requires-Dist: PyMuPDF==1.24.14
21
+ Requires-Dist: chonkie==0.4.1
22
+ Requires-Dist: GPUtil==1.4.0
23
+ Requires-Dist: unidecode==1.3.8
24
+ Requires-Dist: python-docx==1.1.2
25
+ Requires-Dist: psutil
26
+ Requires-Dist: thefuzz==0.22.1
27
+ Requires-Dist: beautifulsoup4==4.12.3
28
+ Requires-Dist: CATEGORIT
@@ -1,3 +1,4 @@
1
+ License.txt
1
2
  setup.py
2
3
  aait.egg-info/PKG-INFO
3
4
  aait.egg-info/SOURCES.txt
@@ -34,8 +35,6 @@ orangecontrib/AAIT/audit_widget/dataTests/Tir 81mm_0003.csv
34
35
  orangecontrib/AAIT/encapsulation/__init__.py
35
36
  orangecontrib/AAIT/fix_torch/fix_torch_dll_error.py
36
37
  orangecontrib/AAIT/fix_torch/libomp140.x86_64.dll
37
- orangecontrib/AAIT/llm/GPT4ALL.py
38
- orangecontrib/AAIT/llm/GPT4ALL_killer.py
39
38
  orangecontrib/AAIT/llm/MergeBaseLora.py
40
39
  orangecontrib/AAIT/llm/SERV_kill_workflow.py
41
40
  orangecontrib/AAIT/llm/SERV_requests.py
@@ -95,13 +94,15 @@ orangecontrib/AAIT/widgets/OWEndLoop.py
95
94
  orangecontrib/AAIT/widgets/OWExecuteScript.py
96
95
  orangecontrib/AAIT/widgets/OWExtraChunks.py
97
96
  orangecontrib/AAIT/widgets/OWExtractTokens.py
97
+ orangecontrib/AAIT/widgets/OWFileMetadata.py
98
+ orangecontrib/AAIT/widgets/OWFileSyncChecker.py
98
99
  orangecontrib/AAIT/widgets/OWFileWithPath.py
99
100
  orangecontrib/AAIT/widgets/OWFindFilesFromDir.py
100
101
  orangecontrib/AAIT/widgets/OWGenerateQuestions.py
101
102
  orangecontrib/AAIT/widgets/OWGenerateSynthesis.py
103
+ orangecontrib/AAIT/widgets/OWGenerateWord.py
102
104
  orangecontrib/AAIT/widgets/OWInputSelector.py
103
105
  orangecontrib/AAIT/widgets/OWKeywords.py
104
- orangecontrib/AAIT/widgets/OWLLM4ALL.py
105
106
  orangecontrib/AAIT/widgets/OWLMStudio.py
106
107
  orangecontrib/AAIT/widgets/OWLanguageDetection.py
107
108
  orangecontrib/AAIT/widgets/OWLemmatizer.py
@@ -150,14 +151,16 @@ orangecontrib/AAIT/widgets/designer/owendloop.ui
150
151
  orangecontrib/AAIT/widgets/designer/owexecutescript.ui
151
152
  orangecontrib/AAIT/widgets/designer/owextrachunks.ui
152
153
  orangecontrib/AAIT/widgets/designer/owextracttokens.ui
154
+ orangecontrib/AAIT/widgets/designer/owfilemetadata.ui
155
+ orangecontrib/AAIT/widgets/designer/owfilesyncchecker.ui
153
156
  orangecontrib/AAIT/widgets/designer/owfilewithpath.ui
154
157
  orangecontrib/AAIT/widgets/designer/owfindfilesfromdir.ui
158
+ orangecontrib/AAIT/widgets/designer/owgenerate_word.ui
155
159
  orangecontrib/AAIT/widgets/designer/owgeneratequestions.ui
156
160
  orangecontrib/AAIT/widgets/designer/owgeneratesynthesis.ui
157
161
  orangecontrib/AAIT/widgets/designer/owkeyword.ui
158
162
  orangecontrib/AAIT/widgets/designer/owlangdetect.ui
159
163
  orangecontrib/AAIT/widgets/designer/owlemmatizer.ui
160
- orangecontrib/AAIT/widgets/designer/owllm4all.ui
161
164
  orangecontrib/AAIT/widgets/designer/owloadworkflow.ui
162
165
  orangecontrib/AAIT/widgets/designer/owmodel_ce_minilml6.ui
163
166
  orangecontrib/AAIT/widgets/designer/owmodel_falcon.ui
@@ -195,7 +198,6 @@ orangecontrib/AAIT/widgets/icons/in_or_out.png
195
198
  orangecontrib/AAIT/widgets/icons/input.png
196
199
  orangecontrib/AAIT/widgets/icons/keyword.png
197
200
  orangecontrib/AAIT/widgets/icons/languages.png
198
- orangecontrib/AAIT/widgets/icons/llm4all.svg
199
201
  orangecontrib/AAIT/widgets/icons/lm_studio.png
200
202
  orangecontrib/AAIT/widgets/icons/local_interf_img_multi_pull.svg
201
203
  orangecontrib/AAIT/widgets/icons/local_interf_multi_pull.svg
@@ -217,7 +219,9 @@ orangecontrib/AAIT/widgets/icons/owembeddings.svg
217
219
  orangecontrib/AAIT/widgets/icons/owenvinfo.png
218
220
  orangecontrib/AAIT/widgets/icons/owexecutescript.svg
219
221
  orangecontrib/AAIT/widgets/icons/owextracttokens.svg
222
+ orangecontrib/AAIT/widgets/icons/owfilemetadata.svg
220
223
  orangecontrib/AAIT/widgets/icons/owfilesfromdir.svg
224
+ orangecontrib/AAIT/widgets/icons/owfilesyncchecker.svg
221
225
  orangecontrib/AAIT/widgets/icons/owfilewithpath.svg
222
226
  orangecontrib/AAIT/widgets/icons/owgeneratequestions.svg
223
227
  orangecontrib/AAIT/widgets/icons/owgeneratesynthesis.png
@@ -243,4 +247,14 @@ orangecontrib/AAIT/widgets/icons/select_dynamic_row.png
243
247
  orangecontrib/AAIT/widgets/icons/startloop.png
244
248
  orangecontrib/AAIT/widgets/icons/tools.png
245
249
  orangecontrib/AAIT/widgets/icons/widgetFactory.svg
246
- orangecontrib/AAIT/widgets/icons/zip.svg
250
+ orangecontrib/AAIT/widgets/icons/zip.svg
251
+ tests/test_class_values_context_handler.py
252
+ tests/test_credentials.py
253
+ tests/test_domain_context_handler.py
254
+ tests/test_gui.py
255
+ tests/test_matplotlib_export.py
256
+ tests/test_perfect_domain_context_handler.py
257
+ tests/test_scatterplot_density.py
258
+ tests/test_settings_handler.py
259
+ tests/test_widgets_outputs.py
260
+ tests/test_workflows.py
@@ -74,29 +74,43 @@ def chunk_sentences(content, tokenizer, chunk_size=500, chunk_overlap=125):
74
74
 
75
75
  def chunk_markdown(content, tokenizer=None, chunk_size=500, chunk_overlap=125):
76
76
  """
77
- Chunk Markdown based on headers #, ##, ###, etc.
78
- Each chunk's metadata includes only the headers in its hierarchy.
79
- Logs are displayed for debugging.
80
-
81
- Parameters:
82
- content (str): The Markdown content to chunk.
83
- tokenizer: Unused (kept for compatibility).
84
- chunk_size (int): Maximum number of words per chunk.
85
- chunk_overlap (int): Number of words to overlap between chunks.
86
-
87
- Returns:
88
- tuple: (chunks, metadatas) where chunks are text segments and metadatas are header hierarchies.
77
+ Découpe un contenu Markdown en chunks :
78
+ - Si des en-têtes Markdown (#, ##, ###...) existent : on respecte la hiérarchie
79
+ et on inclut dans les métadonnées uniquement les titres de la branche courante.
80
+ - Sinon : on délègue à chunk_words().
81
+
82
+ Parameters
83
+ ----------
84
+ content : str
85
+ Le contenu (Markdown ou texte brut).
86
+ tokenizer : any
87
+ Tokenizer utilisé par WordChunker si besoin.
88
+ chunk_size : int
89
+ Nombre max de mots par chunk.
90
+ chunk_overlap : int
91
+ Overlap (en mots) entre deux chunks consécutifs.
92
+
93
+ Returns
94
+ -------
95
+ (chunks, metadatas) : tuple(list[str], list[str])
96
+ chunks : segments de texte
97
+ metadatas : hiérarchies de titres associées (chaînes " ; " séparées), vide si aucun titre.
89
98
  """
99
+ if not content or not isinstance(content, str):
100
+ return [], []
101
+
90
102
  header_regex = re.compile(r"^(#{1,6})\s+(.*)", re.MULTILINE)
91
103
  matches = list(header_regex.finditer(content))
92
104
 
105
+ # Cas SANS en-têtes : appel direct à chunk_words
93
106
  if not matches:
94
- return [], []
107
+ chunks, _ = chunk_words(content, tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
108
+ return chunks, [""] * len(chunks)
95
109
 
96
- # Extract sections: (level, title, body)
110
+ # Cas AVEC en-têtes : extraire les sections (level, title, body)
97
111
  sections = []
98
112
  for i, match in enumerate(matches):
99
- level = len(match.group(1)) # Number of # symbols
113
+ level = len(match.group(1))
100
114
  title = match.group(2).strip()
101
115
  start = match.end()
102
116
  end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
@@ -104,32 +118,23 @@ def chunk_markdown(content, tokenizer=None, chunk_size=500, chunk_overlap=125):
104
118
  sections.append((level, title, body))
105
119
 
106
120
  chunks, metadatas = [], []
107
- current_titles = {} # Maps header level to title
121
+ current_titles = {}
108
122
 
109
123
  for level, title, body in sections:
110
- # Update current_titles: Keep headers <= current level, clear others
111
- current_titles[level] = title
112
- # Remove headers at deeper levels or same level from previous sections
124
+ # purge les niveaux >= level
113
125
  for l in list(current_titles.keys()):
114
- if l >= level and l != level:
126
+ if l >= level:
115
127
  current_titles.pop(l, None)
128
+ current_titles[level] = title
116
129
 
117
- # Build metadata: Join headers from level 1 to current level
118
- metadata = " ; ".join(current_titles[l] for l in sorted(current_titles) if l <= level)
119
- #print(f"Section: {title} (Level {level}), Metadata: {metadata}") # Debug
130
+ metadata = " ; ".join(current_titles[lvl] for lvl in sorted(current_titles) if lvl <= level)
120
131
 
121
- # Split body into words
122
- words = body.split()
132
+ # déléguer le découpage de body à chunk_words
133
+ body_chunks, _ = chunk_words(body, tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
123
134
 
124
- if len(words) <= chunk_size:
125
- chunks.append(body)
135
+ for ch in body_chunks:
136
+ chunks.append(ch)
126
137
  metadatas.append(metadata)
127
- else:
128
- for i in range(0, len(words), chunk_size - chunk_overlap):
129
- chunk = " ".join(words[i:i + chunk_size])
130
- chunks.append(chunk)
131
- metadatas.append(metadata)
132
- #print(f"Chunk: {chunk[:50]}..., Metadata: {metadata}") # Debug
133
138
 
134
139
  return chunks, metadatas
135
140
 
@@ -1,38 +1,45 @@
1
1
  import os
2
2
  import json
3
3
  import ntpath
4
+ from pathlib import Path
4
5
 
5
- from Orange.data import Table, Domain, StringVariable
6
+ from Orange.data import Table, Domain, StringVariable, ContinuousVariable
6
7
 
7
8
  import fitz
8
9
  import docx
9
10
 
10
11
 
11
-
12
12
  def process_documents(dirpath):
13
- if dirpath is None or not os.path.exists(dirpath):
13
+ if dirpath is None or not Path(dirpath).exists():
14
14
  return None, None
15
15
 
16
+ # Normalize dirpath
17
+ dirpath = Path(dirpath).resolve()
18
+
16
19
  # get path from user selection
17
20
  embeddings = check_for_embeddings(dirpath)
18
- dirpath = dirpath.replace("\\","/")
19
21
 
20
22
  # Set selected path in the saved embeddings
21
23
  if embeddings is not None:
22
- common_path = find_common_root(embeddings).replace("\\","/")
24
+ common_path = Path(find_common_root(embeddings)).resolve()
23
25
  for row in embeddings:
24
- row["path"] = row["path"].value.replace("\\","/").replace(common_path, dirpath)
26
+ row_path = Path(str(row["path"].value)).resolve()
27
+ # Replace common root with the current dirpath
28
+ try:
29
+ row["path"] = str(dirpath / row_path.relative_to(common_path))
30
+ except ValueError:
31
+ # If relative_to fails (paths not matching), just normalize
32
+ row["path"] = str(row_path)
25
33
 
26
34
  # Verify which files are already processed
27
35
  files_to_process = get_files_to_process(dirpath, embeddings)
28
36
 
29
37
  rows = []
30
38
  for file in files_to_process:
31
- # Get the text content from the file
32
- content = extract_text(file)
33
- filename = ntpath.basename(file)
34
- # Build a row containing dirpath | filename | content
35
- row = [file, filename, content]
39
+ file = Path(file).resolve()
40
+ content = extract_text(str(file)) # extractor may expect string
41
+ filename = file.name
42
+ row = [str(file), filename, content] # store strings in Orange table
36
43
  rows.append(row)
37
44
 
38
45
  # Build a table with the constructed rows
@@ -44,93 +51,115 @@ def process_documents(dirpath):
44
51
  return out_data, embeddings
45
52
 
46
53
 
54
+ def check_for_embeddings(folder_path):
55
+ """
56
+ Check for an embeddings.pkl file in a given folder. Return its content if it exists.
57
+
58
+ Parameters:
59
+ folder_path (str | Path): The path to the folder where embeddings.pkl may exist.
60
+
61
+ Returns:
62
+ Table or None: The content of embeddings.pkl, or None if not found.
63
+ """
64
+ folder_path = Path(folder_path).resolve()
65
+
66
+ filepaths = [
67
+ folder_path / "embeddings_question.pkl",
68
+ folder_path / "embeddings.pkl"
69
+ ]
70
+
71
+ for filepath in filepaths:
72
+ if filepath.exists():
73
+ return Table.from_file(str(filepath)) # Table.from_file expects a str
74
+ return None
75
+
76
+
47
77
  def find_common_root(data_table, column_name="path"):
48
78
  """Finds the common root path from a column of file paths in an Orange Data Table."""
49
- paths = [str(row[column_name]) for row in data_table if row[column_name] is not None]
79
+ paths = [Path(str(row[column_name].value)).resolve()
80
+ for row in data_table if row[column_name] is not None]
50
81
  if not paths:
51
82
  return ""
52
- return os.path.commonpath(paths)
83
+ return str(Path(os.path.commonpath(paths)).resolve())
53
84
 
54
85
 
55
86
  def get_files_to_process(folder_path, table=None):
56
87
  """
57
- Finds all PDF files in a folder (including subfolders) that are not already in the table.
58
- The comparison is based on "name" (relative path from the main folder) instead of full paths.
88
+ Finds all PDF/DOCX files in a folder (including subfolders) that are not already in the table
89
+ or that have changed since last check (based on file size).
59
90
 
60
91
  :param folder_path: Path to the folder to scan for documents.
61
- :param table: Orange Data Table with columns "path", "name", and "content".
62
- :return: List of paths to files not present in the table (by name, including subfolder structure).
92
+ :param table: Orange Data Table with column "path".
93
+ :return: List of new/changed file paths.
63
94
  """
64
- #TODO
65
- # Supported file extensions
66
- supported_extensions = [".pdf", ".docx"]
95
+ supported_extensions = {".pdf", ".docx"}
67
96
 
68
- # Read the json containing file sizes
69
- filepath_sizes = os.path.join(folder_path, "sizes.json")
70
- if os.path.exists(filepath_sizes):
97
+ folder_path = Path(folder_path).resolve()
98
+ filepath_sizes = folder_path / "sizes.json"
99
+
100
+ print(filepath_sizes)
101
+ print(filepath_sizes.exists())
102
+
103
+ # Load previous file sizes
104
+ if filepath_sizes.exists():
71
105
  with open(filepath_sizes, "r") as json_file:
72
- sizes = json.load(json_file)
106
+ sizes = {Path(k): v for k, v in json.load(json_file).items()}
73
107
  else:
74
- sizes = dict()
108
+ sizes = {}
75
109
 
76
- # Extract the existing file names from the Orange Data Table
110
+ print(sizes)
111
+
112
+ # Extract the existing paths from the Orange Data Table
77
113
  if table:
78
- existing_paths = set(table[:, "path"].metas.flatten()) # Extract names from the table
114
+ # Orange stores metas as strings turn them into Paths
115
+ existing_paths = {Path(str(p)).resolve() for p in table[:, "path"].metas.flatten()}
79
116
  else:
80
117
  existing_paths = set()
81
118
 
82
-
83
- # Walk through the folder and its subfolders
84
119
  new_files = []
85
- for root, _, files in os.walk(folder_path):
86
- for file in files:
87
- # Check if the file has a supported extension
88
- if os.path.splitext(file)[1].lower() in supported_extensions:
89
- # Add the file if it is not already in the table
90
- filepath = os.path.join(root, file).replace("\\","/")
91
- if filepath not in existing_paths:
92
- new_files.append(filepath)
93
- sizes[filepath] = os.path.getsize(filepath)
94
- # If the file is in the table, verify if the file has been modified (comparing the size)
95
- else:
96
- new_size = os.path.getsize(filepath)
97
- if filepath not in sizes.keys():
98
- sizes[filepath] = new_size
99
- else:
100
- old_size = sizes[filepath]
101
- if old_size != new_size:
102
- new_files.append(filepath)
103
- table = remove_from_table(filepath, table)
104
- sizes[filepath] = new_size
105
- with open(filepath_sizes, "w") as json_file:
106
- json.dump(sizes, json_file, indent=4)
107
- return new_files
108
120
 
121
+ # Walk through the folder and subfolders
122
+ for file in folder_path.rglob("*"):
123
+ if file.suffix.lower() in supported_extensions:
124
+ file = file.resolve()
125
+ size = file.stat().st_size
109
126
 
110
- def remove_from_table(filepath, table):
111
- filtered_table = Table.from_list(domain=table.domain,
112
- rows=[row for row in table if row["path"].value != filepath])
113
- return filtered_table
127
+ if file not in existing_paths:
128
+ # New file
129
+ new_files.append(str(file))
130
+ sizes[Path(ntpath.basename(file))] = size
131
+ else:
132
+ # File already in table: check if size changed
133
+ print("File: ", Path(ntpath.basename(file)))
134
+ old_size = sizes.get(Path(ntpath.basename(file)))
135
+ print(old_size)
136
+ if old_size is None or old_size != size:
137
+ new_files.append(str(file))
138
+ table = remove_from_table(file, table)
139
+ sizes[Path(ntpath.basename(file))] = size
114
140
 
115
141
 
116
- def check_for_embeddings(folder_path):
117
- """
118
- Check for an embeddings.pkl file in a given folder. Return its content if it exists.
142
+ # Save updated sizes.json (keys must be strings for JSON)
143
+ with open(filepath_sizes, "w") as json_file:
144
+ json.dump({str(k): v for k, v in sizes.items()}, json_file, indent=4)
119
145
 
120
- Parameters:
121
- folder_path (str): The path to the folder where embeddings.pkl may exist.
146
+ return new_files
122
147
 
123
- Returns:
124
- Table or None: The content of embeddings.pkl.
148
+
149
+ def remove_from_table(filepath, table):
125
150
  """
126
- filepaths = [os.path.join(folder_path, "embeddings_question.pkl"),
127
- os.path.join(folder_path, "embeddings.pkl")]
128
- for filepath in filepaths:
129
- if os.path.exists(filepath):
130
- data = Table.from_file(filepath)
131
- return data
132
- else:
133
- return None
151
+ Remove rows from the Orange table where 'path' matches the given filepath.
152
+ """
153
+ filepath = Path(filepath).resolve()
154
+
155
+ filtered_table = Table.from_list(
156
+ domain=table.domain,
157
+ rows=[
158
+ row for row in table
159
+ if Path(str(row["path"].value)).resolve() != filepath
160
+ ]
161
+ )
162
+ return filtered_table
134
163
 
135
164
 
136
165
  def extract_text(filepath):
@@ -1357,7 +1357,7 @@ def get_second_from_1970():
1357
1357
 
1358
1358
  def write_file_time(path):
1359
1359
  time= get_second_from_1970()
1360
- time_ok=path[:-3]+".ok"
1360
+ time_ok=path[:-4]+".ok"
1361
1361
  reset_files([time_ok])
1362
1362
  with open(path, "w") as f:
1363
1363
  f.write(str(time))
@@ -1369,7 +1369,7 @@ def write_file_time(path):
1369
1369
  return
1370
1370
 
1371
1371
  def read_file_time(path):
1372
- time_ok = path[:-3] + ".ok"
1372
+ time_ok = path[:-4] + ".ok"
1373
1373
  for _ in range(100):
1374
1374
  if not os.path.exists(time_ok):
1375
1375
  time.sleep(0.5)
@@ -1,11 +1,4 @@
1
- import subprocess
2
1
  import os
3
- import sys
4
-
5
- if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\","/"):
6
- from Orange.widgets.orangecontrib.AAIT.utils import SimpleDialogQt
7
- else:
8
- from orangecontrib.AAIT.utils import SimpleDialogQt
9
2
 
10
3
  def mac_shellcopy(src, dest):
11
4
  """
@@ -15,7 +8,6 @@ def mac_shellcopy(src, dest):
15
8
  :param dest: Chemin de destination (str)
16
9
  :returns: True si succès, False sinon
17
10
  """
18
- import io
19
11
  from tqdm import tqdm
20
12
 
21
13
  BUFFER_SIZE = 10 * 1024 * 1024 # 10MB buffer
@@ -0,0 +1,14 @@
1
+ import os
2
+
3
+ if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\","/"):
4
+ from Orange.widgets.orangecontrib.AAIT.utils import MetManagement
5
+ from Orange.widgets.orangecontrib.AAIT.utils.tools import (
6
+ change_owcorpus, concat_splitted_pypi)
7
+ else:
8
+ from orangecontrib.AAIT.utils import MetManagement
9
+ from orangecontrib.AAIT.utils.tools import (change_owcorpus,
10
+ concat_splitted_pypi)
11
+ #concat_splitted_pypi.unzip_dependancy_if_needed(concat_splitted_pypi.get_path_of_OrangeDir()+"/../aait_store",concat_splitted_pypi.get_path_of_OrangeDir()+"/../aait_store/Parameters/requirements.json",concat_splitted_pypi.get_site_package_path()+"aait_store_cut-part_001/input/aait_store.zip.001",16)
12
+ #concat_splitted_pypi.unzip_dependancy_if_needed(MetManagement.get_local_store_path()+"Models/NLP/all-mpnet-base-v2",MetManagement.get_local_store_path()+"Models/NLP/all-mpnet-base-v2/model.safetensors",concat_splitted_pypi.get_site_package_path()+"all-mpnet-base-v2-pypi-part_001/input/all-mpnet-base-v2.zip.001",5)
13
+ #concat_splitted_pypi.unzip_dependancy_if_needed(concat_splitted_pypi.get_path_of_OrangeDir()+"/Lib/site-packages/forall/gpt4all",concat_splitted_pypi.get_path_of_OrangeDir()+"/Lib/site-packages/forall/gpt4all/bin/chat.exe",concat_splitted_pypi.get_site_package_path()+"gpt4all-pypi-part_001/input/gpt4all.zip.001",0)
14
+ change_owcorpus.replace_owcorpus_file()
@@ -452,48 +452,3 @@ def select_new_file_ctypes(file_filter="All Files (*.*)"):
452
452
  return result.split('\0', 1)[0].replace("\\", "/")
453
453
 
454
454
 
455
-
456
- import subprocess
457
- import os
458
- import sys
459
-
460
- def mac_shellcopy(src, dest):
461
- """
462
- Copy files or directories on macOS using AppleScript to show a native progress dialog.
463
- :param src: Path to source file or directory (str or list of str)
464
- :param dest: Path to destination directory (str)
465
- :returns: True if successful, False otherwise
466
- """
467
- if isinstance(src, str):
468
- src = [src]
469
- src = [os.path.abspath(s) for s in src]
470
- dest = os.path.abspath(dest)
471
-
472
- # AppleScript for copying with progress dialog
473
- script = f'''
474
- set srcList to {{{", ".join([f'POSIX file "{s}"' for s in src])}}}
475
- set destFolder to POSIX file "{dest}"
476
- tell application "Finder"
477
- repeat with aSrc in srcList
478
- duplicate aSrc to destFolder with replacing
479
- end repeat
480
- end tell
481
- '''
482
-
483
- try:
484
- result = subprocess.run(
485
- ['osascript', '-e', script],
486
- capture_output=True,
487
- text=True
488
- )
489
- if result.returncode == 0:
490
- return True
491
- else:
492
- print("AppleScript error:", result.stderr)
493
- return False
494
- except Exception as e:
495
- print("Error:", e)
496
- return False
497
-
498
- # Usage:
499
- # mac_shellcopy("/path/to/source", "/path/to/destination")