aait 2.1.1__tar.gz → 2.1.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aait-2.1.2.1/License.txt +6 -0
- aait-2.1.2.1/PKG-INFO +28 -0
- aait-2.1.2.1/aait.egg-info/PKG-INFO +28 -0
- {aait-2.1.1 → aait-2.1.2.1}/aait.egg-info/SOURCES.txt +20 -6
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/chunking.py +38 -33
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/process_documents.py +100 -71
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/MetManagement.py +2 -2
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/mac_utils.py +0 -8
- aait-2.1.2.1/orangecontrib/AAIT/utils/tools/first_time_check.py +14 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/windows_utils.py +0 -45
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWAddColumns.py +23 -6
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWConverseLLM.py +1 -1
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWExecuteScript.py +4 -0
- aait-2.1.2.1/orangecontrib/AAIT/widgets/OWFileMetadata.py +183 -0
- aait-2.1.2.1/orangecontrib/AAIT/widgets/OWFileSyncChecker.py +282 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWFindFilesFromDir.py +41 -6
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWGenerateQuestions.py +1 -1
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWGenerateSynthesis.py +1 -1
- aait-2.1.2.1/orangecontrib/AAIT/widgets/OWGenerateWord.py +483 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWInputSelector.py +6 -9
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWLemmatizer.py +1 -1
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen3B_Q4.py +4 -1
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q6.py +4 -1
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_EN.py +1 -1
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_FR.py +1 -1
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWProcessDocumentsFromPath.py +1 -1
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWQueryLLM.py +1 -1
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWReranking.py +1 -1
- aait-2.1.2.1/orangecontrib/AAIT/widgets/designer/ow_in_or_out_path.ui +133 -0
- aait-2.1.2.1/orangecontrib/AAIT/widgets/designer/owfilemetadata.ui +41 -0
- aait-2.1.2.1/orangecontrib/AAIT/widgets/designer/owfilesyncchecker.ui +43 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owfilewithpath.ui +16 -3
- aait-2.1.2.1/orangecontrib/AAIT/widgets/designer/owgenerate_word.ui +54 -0
- aait-2.1.2.1/orangecontrib/AAIT/widgets/icons/owfilemetadata.svg +4 -0
- aait-2.1.2.1/orangecontrib/AAIT/widgets/icons/owfilesyncchecker.svg +45 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/processdocuments.svg +12 -0
- {aait-2.1.1 → aait-2.1.2.1}/setup.py +1 -1
- aait-2.1.2.1/tests/test_class_values_context_handler.py +75 -0
- aait-2.1.2.1/tests/test_credentials.py +76 -0
- aait-2.1.2.1/tests/test_domain_context_handler.py +401 -0
- aait-2.1.2.1/tests/test_gui.py +140 -0
- aait-2.1.2.1/tests/test_matplotlib_export.py +43 -0
- aait-2.1.2.1/tests/test_perfect_domain_context_handler.py +148 -0
- aait-2.1.2.1/tests/test_scatterplot_density.py +59 -0
- aait-2.1.2.1/tests/test_settings_handler.py +27 -0
- aait-2.1.2.1/tests/test_widgets_outputs.py +29 -0
- aait-2.1.2.1/tests/test_workflows.py +80 -0
- aait-2.1.1/PKG-INFO +0 -8
- aait-2.1.1/aait.egg-info/PKG-INFO +0 -8
- aait-2.1.1/orangecontrib/AAIT/llm/GPT4ALL.py +0 -613
- aait-2.1.1/orangecontrib/AAIT/llm/GPT4ALL_killer.py +0 -68
- aait-2.1.1/orangecontrib/AAIT/utils/tools/first_time_check.py +0 -14
- aait-2.1.1/orangecontrib/AAIT/widgets/OWLLM4ALL.py +0 -304
- aait-2.1.1/orangecontrib/AAIT/widgets/designer/ow_in_or_out_path.ui +0 -107
- aait-2.1.1/orangecontrib/AAIT/widgets/designer/owllm4all.ui +0 -70
- aait-2.1.1/orangecontrib/AAIT/widgets/icons/llm4all.svg +0 -72
- {aait-2.1.1 → aait-2.1.2.1}/aait.egg-info/dependency_links.txt +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/aait.egg-info/entry_points.txt +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/aait.egg-info/namespace_packages.txt +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/aait.egg-info/requires.txt +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/aait.egg-info/top_level.txt +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/__init__.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/SignalReceiver.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/__init__.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/audit_widget.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/00079473A - Copie (2).TIF +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/00079473A - Copie (3).TIF +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/00079473A - Copie (4).TIF +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/Tir 81mm_0001.csv +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/Tir 81mm_0002.csv +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dataTests/Tir 81mm_0003.csv +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/dynamic_results.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/test_all_widgets.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/test_server.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_4all.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_chunking.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_edit_table.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_mpnet_create_embeddings.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_optimisation.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_optimisationselection.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_powfactory.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_queryllm.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_spacy_md_fr_lemmatizer.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widget_traduction.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/audit_widget/widgets_model.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/encapsulation/__init__.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/fix_torch/fix_torch_dll_error.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/fix_torch/libomp140.x86_64.dll +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/MergeBaseLora.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/SERV_kill_workflow.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/SERV_requests.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/SERV_start_workflow.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/Tutorial_Finetuning.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/Tutorial_TestFinetuning.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/__init__.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/answers.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/embeddings.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/finetuning.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/functions_DatasetGeneration.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/functions_Finetuning.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/lemmes.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/lmstudio.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/main_DatasetGeneration.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/main_Finetuning.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/prompt_management.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/quickpy.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/test.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/test_functions.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/llm/translations.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/optimiser/__init__.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/optimiser/optuna_multi.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/CheckMetaData.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/SimpleDialogQt.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/__init__.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/aait_repo_file.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/check_data_in.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/delta_local_shared_fodler.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/import_uic.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/initialize_from_ini.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/shared_functions.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/shared_variables.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/subprocess_management.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/thread_management.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/tools/__init__.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/tools/change_owcorpus.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/tools/concat_splitted_pypi.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/utils/tools/owcorpus_ok.txt +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWAAITResourcesManager.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWApplyRules.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWCN2rule_view.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWChunking.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWConcatRules.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWCreateEmbeddings.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWDisplayMD.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWEditTable.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWEndLoop.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWExtraChunks.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWExtractTokens.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWFileWithPath.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWKeywords.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWLMStudio.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWLanguageDetection.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_CE_MiniLML6.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Falcon.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_HelsinkiEnFr.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_HelsinkiFrEn.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_MPNET.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Mistral.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen1B5_Q6.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen2_5_32B.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q4.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_Solar.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWModel_SolarUncensored.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWOptimisation.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWOptimisationSelection.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWOptimisationSendScore.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWRandomData.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWSaveFilepathEntry.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWSelectRowsDynamic.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWStartLoop.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWTable2Corpus.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWTranslation.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWTrigger.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/OWUnzipFolder.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/POW_Wfactory.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/__init__.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owMarkdown.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/ow_widget_random_data.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owaddcolumns.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owapplyrules.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owchunking.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owconcatrules.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owconversellm.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owembeddings.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owendloop.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owexecutescript.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owextrachunks.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owextracttokens.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owfindfilesfromdir.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owgeneratequestions.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owgeneratesynthesis.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owkeyword.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owlangdetect.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owlemmatizer.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owloadworkflow.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_ce_minilml6.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_falcon.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_en_fr.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_fr_en.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_mistral.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_mpnet.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_qwen.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_solar.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_solar_uncensored.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_en.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_fr.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/ownumberpointinrules.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owprocessdocuments.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owqueryllm.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owreranking.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owsavewithpath.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owselect_row_dynamic.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owstartloop.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owtable2corpus.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owtranslation.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owtrigger.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/designer/owunzipfolder.ui +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/CN2RuleViewer.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/MDViewer.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/Mistral.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/apply_rules.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/category.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/dark_green.txt +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/de.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/documents.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/endloop.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/extra_chunks.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/in_or_out.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/input.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/keyword.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/languages.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/lm_studio.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/local_interf_img_multi_pull.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/local_interf_multi_pull.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/local_interf_pull.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/local_interf_push.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/local_interf_text_pull.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/logo_solar.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/logo_uncensoredsolar.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/logo_upload.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/models.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/optimisation.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/optimizer.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/output.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owCN2_intersect_rules.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owchunking.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owconversellm.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owedittable.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owembeddings.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owenvinfo.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owexecutescript.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owextracttokens.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owfilesfromdir.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owfilewithpath.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owgeneratequestions.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owgeneratesynthesis.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owlemmatizer.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_ce_minilml6.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_falcon.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_en_fr.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_fr_en.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_mpnet.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_en.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_fr.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owoptimisation.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owoptimisationselection.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owqueryllm.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owreranking.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owsavefilepathentry.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owtable2corpus.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owtranslation.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/owtrigger.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/qwen-color.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/select_dynamic_row.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/startloop.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/tools.png +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/widgetFactory.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/AAIT/widgets/icons/zip.svg +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/orangecontrib/__init__.py +0 -0
- {aait-2.1.1 → aait-2.1.2.1}/setup.cfg +0 -0
aait-2.1.2.1/License.txt
ADDED
aait-2.1.2.1/PKG-INFO
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: aait
|
|
3
|
+
Version: 2.1.2.1
|
|
4
|
+
Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
|
|
5
|
+
Home-page:
|
|
6
|
+
Author: Orange community
|
|
7
|
+
Author-email:
|
|
8
|
+
Keywords: orange3 add-on
|
|
9
|
+
License-File: License.txt
|
|
10
|
+
Requires-Dist: torch
|
|
11
|
+
Requires-Dist: sentence-transformers==5.0.0
|
|
12
|
+
Requires-Dist: gpt4all[all]==2.8.2
|
|
13
|
+
Requires-Dist: sacremoses==0.1.1
|
|
14
|
+
Requires-Dist: transformers==4.51.3
|
|
15
|
+
Requires-Dist: sentencepiece==0.2.0
|
|
16
|
+
Requires-Dist: optuna
|
|
17
|
+
Requires-Dist: spacy==3.7.6
|
|
18
|
+
Requires-Dist: markdown
|
|
19
|
+
Requires-Dist: python-multipart
|
|
20
|
+
Requires-Dist: PyMuPDF==1.24.14
|
|
21
|
+
Requires-Dist: chonkie==0.4.1
|
|
22
|
+
Requires-Dist: GPUtil==1.4.0
|
|
23
|
+
Requires-Dist: unidecode==1.3.8
|
|
24
|
+
Requires-Dist: python-docx==1.1.2
|
|
25
|
+
Requires-Dist: psutil
|
|
26
|
+
Requires-Dist: thefuzz==0.22.1
|
|
27
|
+
Requires-Dist: beautifulsoup4==4.12.3
|
|
28
|
+
Requires-Dist: CATEGORIT
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: aait
|
|
3
|
+
Version: 2.1.2.1
|
|
4
|
+
Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
|
|
5
|
+
Home-page:
|
|
6
|
+
Author: Orange community
|
|
7
|
+
Author-email:
|
|
8
|
+
Keywords: orange3 add-on
|
|
9
|
+
License-File: License.txt
|
|
10
|
+
Requires-Dist: torch
|
|
11
|
+
Requires-Dist: sentence-transformers==5.0.0
|
|
12
|
+
Requires-Dist: gpt4all[all]==2.8.2
|
|
13
|
+
Requires-Dist: sacremoses==0.1.1
|
|
14
|
+
Requires-Dist: transformers==4.51.3
|
|
15
|
+
Requires-Dist: sentencepiece==0.2.0
|
|
16
|
+
Requires-Dist: optuna
|
|
17
|
+
Requires-Dist: spacy==3.7.6
|
|
18
|
+
Requires-Dist: markdown
|
|
19
|
+
Requires-Dist: python-multipart
|
|
20
|
+
Requires-Dist: PyMuPDF==1.24.14
|
|
21
|
+
Requires-Dist: chonkie==0.4.1
|
|
22
|
+
Requires-Dist: GPUtil==1.4.0
|
|
23
|
+
Requires-Dist: unidecode==1.3.8
|
|
24
|
+
Requires-Dist: python-docx==1.1.2
|
|
25
|
+
Requires-Dist: psutil
|
|
26
|
+
Requires-Dist: thefuzz==0.22.1
|
|
27
|
+
Requires-Dist: beautifulsoup4==4.12.3
|
|
28
|
+
Requires-Dist: CATEGORIT
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
License.txt
|
|
1
2
|
setup.py
|
|
2
3
|
aait.egg-info/PKG-INFO
|
|
3
4
|
aait.egg-info/SOURCES.txt
|
|
@@ -34,8 +35,6 @@ orangecontrib/AAIT/audit_widget/dataTests/Tir 81mm_0003.csv
|
|
|
34
35
|
orangecontrib/AAIT/encapsulation/__init__.py
|
|
35
36
|
orangecontrib/AAIT/fix_torch/fix_torch_dll_error.py
|
|
36
37
|
orangecontrib/AAIT/fix_torch/libomp140.x86_64.dll
|
|
37
|
-
orangecontrib/AAIT/llm/GPT4ALL.py
|
|
38
|
-
orangecontrib/AAIT/llm/GPT4ALL_killer.py
|
|
39
38
|
orangecontrib/AAIT/llm/MergeBaseLora.py
|
|
40
39
|
orangecontrib/AAIT/llm/SERV_kill_workflow.py
|
|
41
40
|
orangecontrib/AAIT/llm/SERV_requests.py
|
|
@@ -95,13 +94,15 @@ orangecontrib/AAIT/widgets/OWEndLoop.py
|
|
|
95
94
|
orangecontrib/AAIT/widgets/OWExecuteScript.py
|
|
96
95
|
orangecontrib/AAIT/widgets/OWExtraChunks.py
|
|
97
96
|
orangecontrib/AAIT/widgets/OWExtractTokens.py
|
|
97
|
+
orangecontrib/AAIT/widgets/OWFileMetadata.py
|
|
98
|
+
orangecontrib/AAIT/widgets/OWFileSyncChecker.py
|
|
98
99
|
orangecontrib/AAIT/widgets/OWFileWithPath.py
|
|
99
100
|
orangecontrib/AAIT/widgets/OWFindFilesFromDir.py
|
|
100
101
|
orangecontrib/AAIT/widgets/OWGenerateQuestions.py
|
|
101
102
|
orangecontrib/AAIT/widgets/OWGenerateSynthesis.py
|
|
103
|
+
orangecontrib/AAIT/widgets/OWGenerateWord.py
|
|
102
104
|
orangecontrib/AAIT/widgets/OWInputSelector.py
|
|
103
105
|
orangecontrib/AAIT/widgets/OWKeywords.py
|
|
104
|
-
orangecontrib/AAIT/widgets/OWLLM4ALL.py
|
|
105
106
|
orangecontrib/AAIT/widgets/OWLMStudio.py
|
|
106
107
|
orangecontrib/AAIT/widgets/OWLanguageDetection.py
|
|
107
108
|
orangecontrib/AAIT/widgets/OWLemmatizer.py
|
|
@@ -150,14 +151,16 @@ orangecontrib/AAIT/widgets/designer/owendloop.ui
|
|
|
150
151
|
orangecontrib/AAIT/widgets/designer/owexecutescript.ui
|
|
151
152
|
orangecontrib/AAIT/widgets/designer/owextrachunks.ui
|
|
152
153
|
orangecontrib/AAIT/widgets/designer/owextracttokens.ui
|
|
154
|
+
orangecontrib/AAIT/widgets/designer/owfilemetadata.ui
|
|
155
|
+
orangecontrib/AAIT/widgets/designer/owfilesyncchecker.ui
|
|
153
156
|
orangecontrib/AAIT/widgets/designer/owfilewithpath.ui
|
|
154
157
|
orangecontrib/AAIT/widgets/designer/owfindfilesfromdir.ui
|
|
158
|
+
orangecontrib/AAIT/widgets/designer/owgenerate_word.ui
|
|
155
159
|
orangecontrib/AAIT/widgets/designer/owgeneratequestions.ui
|
|
156
160
|
orangecontrib/AAIT/widgets/designer/owgeneratesynthesis.ui
|
|
157
161
|
orangecontrib/AAIT/widgets/designer/owkeyword.ui
|
|
158
162
|
orangecontrib/AAIT/widgets/designer/owlangdetect.ui
|
|
159
163
|
orangecontrib/AAIT/widgets/designer/owlemmatizer.ui
|
|
160
|
-
orangecontrib/AAIT/widgets/designer/owllm4all.ui
|
|
161
164
|
orangecontrib/AAIT/widgets/designer/owloadworkflow.ui
|
|
162
165
|
orangecontrib/AAIT/widgets/designer/owmodel_ce_minilml6.ui
|
|
163
166
|
orangecontrib/AAIT/widgets/designer/owmodel_falcon.ui
|
|
@@ -195,7 +198,6 @@ orangecontrib/AAIT/widgets/icons/in_or_out.png
|
|
|
195
198
|
orangecontrib/AAIT/widgets/icons/input.png
|
|
196
199
|
orangecontrib/AAIT/widgets/icons/keyword.png
|
|
197
200
|
orangecontrib/AAIT/widgets/icons/languages.png
|
|
198
|
-
orangecontrib/AAIT/widgets/icons/llm4all.svg
|
|
199
201
|
orangecontrib/AAIT/widgets/icons/lm_studio.png
|
|
200
202
|
orangecontrib/AAIT/widgets/icons/local_interf_img_multi_pull.svg
|
|
201
203
|
orangecontrib/AAIT/widgets/icons/local_interf_multi_pull.svg
|
|
@@ -217,7 +219,9 @@ orangecontrib/AAIT/widgets/icons/owembeddings.svg
|
|
|
217
219
|
orangecontrib/AAIT/widgets/icons/owenvinfo.png
|
|
218
220
|
orangecontrib/AAIT/widgets/icons/owexecutescript.svg
|
|
219
221
|
orangecontrib/AAIT/widgets/icons/owextracttokens.svg
|
|
222
|
+
orangecontrib/AAIT/widgets/icons/owfilemetadata.svg
|
|
220
223
|
orangecontrib/AAIT/widgets/icons/owfilesfromdir.svg
|
|
224
|
+
orangecontrib/AAIT/widgets/icons/owfilesyncchecker.svg
|
|
221
225
|
orangecontrib/AAIT/widgets/icons/owfilewithpath.svg
|
|
222
226
|
orangecontrib/AAIT/widgets/icons/owgeneratequestions.svg
|
|
223
227
|
orangecontrib/AAIT/widgets/icons/owgeneratesynthesis.png
|
|
@@ -243,4 +247,14 @@ orangecontrib/AAIT/widgets/icons/select_dynamic_row.png
|
|
|
243
247
|
orangecontrib/AAIT/widgets/icons/startloop.png
|
|
244
248
|
orangecontrib/AAIT/widgets/icons/tools.png
|
|
245
249
|
orangecontrib/AAIT/widgets/icons/widgetFactory.svg
|
|
246
|
-
orangecontrib/AAIT/widgets/icons/zip.svg
|
|
250
|
+
orangecontrib/AAIT/widgets/icons/zip.svg
|
|
251
|
+
tests/test_class_values_context_handler.py
|
|
252
|
+
tests/test_credentials.py
|
|
253
|
+
tests/test_domain_context_handler.py
|
|
254
|
+
tests/test_gui.py
|
|
255
|
+
tests/test_matplotlib_export.py
|
|
256
|
+
tests/test_perfect_domain_context_handler.py
|
|
257
|
+
tests/test_scatterplot_density.py
|
|
258
|
+
tests/test_settings_handler.py
|
|
259
|
+
tests/test_widgets_outputs.py
|
|
260
|
+
tests/test_workflows.py
|
|
@@ -74,29 +74,43 @@ def chunk_sentences(content, tokenizer, chunk_size=500, chunk_overlap=125):
|
|
|
74
74
|
|
|
75
75
|
def chunk_markdown(content, tokenizer=None, chunk_size=500, chunk_overlap=125):
|
|
76
76
|
"""
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
77
|
+
Découpe un contenu Markdown en chunks :
|
|
78
|
+
- Si des en-têtes Markdown (#, ##, ###...) existent : on respecte la hiérarchie
|
|
79
|
+
et on inclut dans les métadonnées uniquement les titres de la branche courante.
|
|
80
|
+
- Sinon : on délègue à chunk_words().
|
|
81
|
+
|
|
82
|
+
Parameters
|
|
83
|
+
----------
|
|
84
|
+
content : str
|
|
85
|
+
Le contenu (Markdown ou texte brut).
|
|
86
|
+
tokenizer : any
|
|
87
|
+
Tokenizer utilisé par WordChunker si besoin.
|
|
88
|
+
chunk_size : int
|
|
89
|
+
Nombre max de mots par chunk.
|
|
90
|
+
chunk_overlap : int
|
|
91
|
+
Overlap (en mots) entre deux chunks consécutifs.
|
|
92
|
+
|
|
93
|
+
Returns
|
|
94
|
+
-------
|
|
95
|
+
(chunks, metadatas) : tuple(list[str], list[str])
|
|
96
|
+
chunks : segments de texte
|
|
97
|
+
metadatas : hiérarchies de titres associées (chaînes " ; " séparées), vide si aucun titre.
|
|
89
98
|
"""
|
|
99
|
+
if not content or not isinstance(content, str):
|
|
100
|
+
return [], []
|
|
101
|
+
|
|
90
102
|
header_regex = re.compile(r"^(#{1,6})\s+(.*)", re.MULTILINE)
|
|
91
103
|
matches = list(header_regex.finditer(content))
|
|
92
104
|
|
|
105
|
+
# Cas SANS en-têtes : appel direct à chunk_words
|
|
93
106
|
if not matches:
|
|
94
|
-
|
|
107
|
+
chunks, _ = chunk_words(content, tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
108
|
+
return chunks, [""] * len(chunks)
|
|
95
109
|
|
|
96
|
-
#
|
|
110
|
+
# Cas AVEC en-têtes : extraire les sections (level, title, body)
|
|
97
111
|
sections = []
|
|
98
112
|
for i, match in enumerate(matches):
|
|
99
|
-
level = len(match.group(1))
|
|
113
|
+
level = len(match.group(1))
|
|
100
114
|
title = match.group(2).strip()
|
|
101
115
|
start = match.end()
|
|
102
116
|
end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
|
|
@@ -104,32 +118,23 @@ def chunk_markdown(content, tokenizer=None, chunk_size=500, chunk_overlap=125):
|
|
|
104
118
|
sections.append((level, title, body))
|
|
105
119
|
|
|
106
120
|
chunks, metadatas = [], []
|
|
107
|
-
current_titles = {}
|
|
121
|
+
current_titles = {}
|
|
108
122
|
|
|
109
123
|
for level, title, body in sections:
|
|
110
|
-
#
|
|
111
|
-
current_titles[level] = title
|
|
112
|
-
# Remove headers at deeper levels or same level from previous sections
|
|
124
|
+
# purge les niveaux >= level
|
|
113
125
|
for l in list(current_titles.keys()):
|
|
114
|
-
if l >= level
|
|
126
|
+
if l >= level:
|
|
115
127
|
current_titles.pop(l, None)
|
|
128
|
+
current_titles[level] = title
|
|
116
129
|
|
|
117
|
-
|
|
118
|
-
metadata = " ; ".join(current_titles[l] for l in sorted(current_titles) if l <= level)
|
|
119
|
-
#print(f"Section: {title} (Level {level}), Metadata: {metadata}") # Debug
|
|
130
|
+
metadata = " ; ".join(current_titles[lvl] for lvl in sorted(current_titles) if lvl <= level)
|
|
120
131
|
|
|
121
|
-
#
|
|
122
|
-
|
|
132
|
+
# déléguer le découpage de body à chunk_words
|
|
133
|
+
body_chunks, _ = chunk_words(body, tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
123
134
|
|
|
124
|
-
|
|
125
|
-
chunks.append(
|
|
135
|
+
for ch in body_chunks:
|
|
136
|
+
chunks.append(ch)
|
|
126
137
|
metadatas.append(metadata)
|
|
127
|
-
else:
|
|
128
|
-
for i in range(0, len(words), chunk_size - chunk_overlap):
|
|
129
|
-
chunk = " ".join(words[i:i + chunk_size])
|
|
130
|
-
chunks.append(chunk)
|
|
131
|
-
metadatas.append(metadata)
|
|
132
|
-
#print(f"Chunk: {chunk[:50]}..., Metadata: {metadata}") # Debug
|
|
133
138
|
|
|
134
139
|
return chunks, metadatas
|
|
135
140
|
|
|
@@ -1,38 +1,45 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import json
|
|
3
3
|
import ntpath
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
|
|
5
|
-
from Orange.data import Table, Domain, StringVariable
|
|
6
|
+
from Orange.data import Table, Domain, StringVariable, ContinuousVariable
|
|
6
7
|
|
|
7
8
|
import fitz
|
|
8
9
|
import docx
|
|
9
10
|
|
|
10
11
|
|
|
11
|
-
|
|
12
12
|
def process_documents(dirpath):
|
|
13
|
-
if dirpath is None or not
|
|
13
|
+
if dirpath is None or not Path(dirpath).exists():
|
|
14
14
|
return None, None
|
|
15
15
|
|
|
16
|
+
# Normalize dirpath
|
|
17
|
+
dirpath = Path(dirpath).resolve()
|
|
18
|
+
|
|
16
19
|
# get path from user selection
|
|
17
20
|
embeddings = check_for_embeddings(dirpath)
|
|
18
|
-
dirpath = dirpath.replace("\\","/")
|
|
19
21
|
|
|
20
22
|
# Set selected path in the saved embeddings
|
|
21
23
|
if embeddings is not None:
|
|
22
|
-
common_path = find_common_root(embeddings).
|
|
24
|
+
common_path = Path(find_common_root(embeddings)).resolve()
|
|
23
25
|
for row in embeddings:
|
|
24
|
-
|
|
26
|
+
row_path = Path(str(row["path"].value)).resolve()
|
|
27
|
+
# Replace common root with the current dirpath
|
|
28
|
+
try:
|
|
29
|
+
row["path"] = str(dirpath / row_path.relative_to(common_path))
|
|
30
|
+
except ValueError:
|
|
31
|
+
# If relative_to fails (paths not matching), just normalize
|
|
32
|
+
row["path"] = str(row_path)
|
|
25
33
|
|
|
26
34
|
# Verify which files are already processed
|
|
27
35
|
files_to_process = get_files_to_process(dirpath, embeddings)
|
|
28
36
|
|
|
29
37
|
rows = []
|
|
30
38
|
for file in files_to_process:
|
|
31
|
-
|
|
32
|
-
content = extract_text(file)
|
|
33
|
-
filename =
|
|
34
|
-
|
|
35
|
-
row = [file, filename, content]
|
|
39
|
+
file = Path(file).resolve()
|
|
40
|
+
content = extract_text(str(file)) # extractor may expect string
|
|
41
|
+
filename = file.name
|
|
42
|
+
row = [str(file), filename, content] # store strings in Orange table
|
|
36
43
|
rows.append(row)
|
|
37
44
|
|
|
38
45
|
# Build a table with the constructed rows
|
|
@@ -44,93 +51,115 @@ def process_documents(dirpath):
|
|
|
44
51
|
return out_data, embeddings
|
|
45
52
|
|
|
46
53
|
|
|
54
|
+
def check_for_embeddings(folder_path):
|
|
55
|
+
"""
|
|
56
|
+
Check for an embeddings.pkl file in a given folder. Return its content if it exists.
|
|
57
|
+
|
|
58
|
+
Parameters:
|
|
59
|
+
folder_path (str | Path): The path to the folder where embeddings.pkl may exist.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Table or None: The content of embeddings.pkl, or None if not found.
|
|
63
|
+
"""
|
|
64
|
+
folder_path = Path(folder_path).resolve()
|
|
65
|
+
|
|
66
|
+
filepaths = [
|
|
67
|
+
folder_path / "embeddings_question.pkl",
|
|
68
|
+
folder_path / "embeddings.pkl"
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
for filepath in filepaths:
|
|
72
|
+
if filepath.exists():
|
|
73
|
+
return Table.from_file(str(filepath)) # Table.from_file expects a str
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
|
|
47
77
|
def find_common_root(data_table, column_name="path"):
|
|
48
78
|
"""Finds the common root path from a column of file paths in an Orange Data Table."""
|
|
49
|
-
paths = [str(row[column_name])
|
|
79
|
+
paths = [Path(str(row[column_name].value)).resolve()
|
|
80
|
+
for row in data_table if row[column_name] is not None]
|
|
50
81
|
if not paths:
|
|
51
82
|
return ""
|
|
52
|
-
return os.path.commonpath(paths)
|
|
83
|
+
return str(Path(os.path.commonpath(paths)).resolve())
|
|
53
84
|
|
|
54
85
|
|
|
55
86
|
def get_files_to_process(folder_path, table=None):
|
|
56
87
|
"""
|
|
57
|
-
Finds all PDF files in a folder (including subfolders) that are not already in the table
|
|
58
|
-
|
|
88
|
+
Finds all PDF/DOCX files in a folder (including subfolders) that are not already in the table
|
|
89
|
+
or that have changed since last check (based on file size).
|
|
59
90
|
|
|
60
91
|
:param folder_path: Path to the folder to scan for documents.
|
|
61
|
-
:param table: Orange Data Table with
|
|
62
|
-
:return: List of
|
|
92
|
+
:param table: Orange Data Table with column "path".
|
|
93
|
+
:return: List of new/changed file paths.
|
|
63
94
|
"""
|
|
64
|
-
|
|
65
|
-
# Supported file extensions
|
|
66
|
-
supported_extensions = [".pdf", ".docx"]
|
|
95
|
+
supported_extensions = {".pdf", ".docx"}
|
|
67
96
|
|
|
68
|
-
|
|
69
|
-
filepath_sizes =
|
|
70
|
-
|
|
97
|
+
folder_path = Path(folder_path).resolve()
|
|
98
|
+
filepath_sizes = folder_path / "sizes.json"
|
|
99
|
+
|
|
100
|
+
print(filepath_sizes)
|
|
101
|
+
print(filepath_sizes.exists())
|
|
102
|
+
|
|
103
|
+
# Load previous file sizes
|
|
104
|
+
if filepath_sizes.exists():
|
|
71
105
|
with open(filepath_sizes, "r") as json_file:
|
|
72
|
-
sizes = json.load(json_file)
|
|
106
|
+
sizes = {Path(k): v for k, v in json.load(json_file).items()}
|
|
73
107
|
else:
|
|
74
|
-
sizes =
|
|
108
|
+
sizes = {}
|
|
75
109
|
|
|
76
|
-
|
|
110
|
+
print(sizes)
|
|
111
|
+
|
|
112
|
+
# Extract the existing paths from the Orange Data Table
|
|
77
113
|
if table:
|
|
78
|
-
|
|
114
|
+
# Orange stores metas as strings → turn them into Paths
|
|
115
|
+
existing_paths = {Path(str(p)).resolve() for p in table[:, "path"].metas.flatten()}
|
|
79
116
|
else:
|
|
80
117
|
existing_paths = set()
|
|
81
118
|
|
|
82
|
-
|
|
83
|
-
# Walk through the folder and its subfolders
|
|
84
119
|
new_files = []
|
|
85
|
-
for root, _, files in os.walk(folder_path):
|
|
86
|
-
for file in files:
|
|
87
|
-
# Check if the file has a supported extension
|
|
88
|
-
if os.path.splitext(file)[1].lower() in supported_extensions:
|
|
89
|
-
# Add the file if it is not already in the table
|
|
90
|
-
filepath = os.path.join(root, file).replace("\\","/")
|
|
91
|
-
if filepath not in existing_paths:
|
|
92
|
-
new_files.append(filepath)
|
|
93
|
-
sizes[filepath] = os.path.getsize(filepath)
|
|
94
|
-
# If the file is in the table, verify if the file has been modified (comparing the size)
|
|
95
|
-
else:
|
|
96
|
-
new_size = os.path.getsize(filepath)
|
|
97
|
-
if filepath not in sizes.keys():
|
|
98
|
-
sizes[filepath] = new_size
|
|
99
|
-
else:
|
|
100
|
-
old_size = sizes[filepath]
|
|
101
|
-
if old_size != new_size:
|
|
102
|
-
new_files.append(filepath)
|
|
103
|
-
table = remove_from_table(filepath, table)
|
|
104
|
-
sizes[filepath] = new_size
|
|
105
|
-
with open(filepath_sizes, "w") as json_file:
|
|
106
|
-
json.dump(sizes, json_file, indent=4)
|
|
107
|
-
return new_files
|
|
108
120
|
|
|
121
|
+
# Walk through the folder and subfolders
|
|
122
|
+
for file in folder_path.rglob("*"):
|
|
123
|
+
if file.suffix.lower() in supported_extensions:
|
|
124
|
+
file = file.resolve()
|
|
125
|
+
size = file.stat().st_size
|
|
109
126
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
127
|
+
if file not in existing_paths:
|
|
128
|
+
# New file
|
|
129
|
+
new_files.append(str(file))
|
|
130
|
+
sizes[Path(ntpath.basename(file))] = size
|
|
131
|
+
else:
|
|
132
|
+
# File already in table: check if size changed
|
|
133
|
+
print("File: ", Path(ntpath.basename(file)))
|
|
134
|
+
old_size = sizes.get(Path(ntpath.basename(file)))
|
|
135
|
+
print(old_size)
|
|
136
|
+
if old_size is None or old_size != size:
|
|
137
|
+
new_files.append(str(file))
|
|
138
|
+
table = remove_from_table(file, table)
|
|
139
|
+
sizes[Path(ntpath.basename(file))] = size
|
|
114
140
|
|
|
115
141
|
|
|
116
|
-
|
|
117
|
-
""
|
|
118
|
-
|
|
142
|
+
# Save updated sizes.json (keys must be strings for JSON)
|
|
143
|
+
with open(filepath_sizes, "w") as json_file:
|
|
144
|
+
json.dump({str(k): v for k, v in sizes.items()}, json_file, indent=4)
|
|
119
145
|
|
|
120
|
-
|
|
121
|
-
folder_path (str): The path to the folder where embeddings.pkl may exist.
|
|
146
|
+
return new_files
|
|
122
147
|
|
|
123
|
-
|
|
124
|
-
|
|
148
|
+
|
|
149
|
+
def remove_from_table(filepath, table):
|
|
125
150
|
"""
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
151
|
+
Remove rows from the Orange table where 'path' matches the given filepath.
|
|
152
|
+
"""
|
|
153
|
+
filepath = Path(filepath).resolve()
|
|
154
|
+
|
|
155
|
+
filtered_table = Table.from_list(
|
|
156
|
+
domain=table.domain,
|
|
157
|
+
rows=[
|
|
158
|
+
row for row in table
|
|
159
|
+
if Path(str(row["path"].value)).resolve() != filepath
|
|
160
|
+
]
|
|
161
|
+
)
|
|
162
|
+
return filtered_table
|
|
134
163
|
|
|
135
164
|
|
|
136
165
|
def extract_text(filepath):
|
|
@@ -1357,7 +1357,7 @@ def get_second_from_1970():
|
|
|
1357
1357
|
|
|
1358
1358
|
def write_file_time(path):
|
|
1359
1359
|
time= get_second_from_1970()
|
|
1360
|
-
time_ok=path[:-
|
|
1360
|
+
time_ok=path[:-4]+".ok"
|
|
1361
1361
|
reset_files([time_ok])
|
|
1362
1362
|
with open(path, "w") as f:
|
|
1363
1363
|
f.write(str(time))
|
|
@@ -1369,7 +1369,7 @@ def write_file_time(path):
|
|
|
1369
1369
|
return
|
|
1370
1370
|
|
|
1371
1371
|
def read_file_time(path):
|
|
1372
|
-
time_ok = path[:-
|
|
1372
|
+
time_ok = path[:-4] + ".ok"
|
|
1373
1373
|
for _ in range(100):
|
|
1374
1374
|
if not os.path.exists(time_ok):
|
|
1375
1375
|
time.sleep(0.5)
|
|
@@ -1,11 +1,4 @@
|
|
|
1
|
-
import subprocess
|
|
2
1
|
import os
|
|
3
|
-
import sys
|
|
4
|
-
|
|
5
|
-
if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\","/"):
|
|
6
|
-
from Orange.widgets.orangecontrib.AAIT.utils import SimpleDialogQt
|
|
7
|
-
else:
|
|
8
|
-
from orangecontrib.AAIT.utils import SimpleDialogQt
|
|
9
2
|
|
|
10
3
|
def mac_shellcopy(src, dest):
|
|
11
4
|
"""
|
|
@@ -15,7 +8,6 @@ def mac_shellcopy(src, dest):
|
|
|
15
8
|
:param dest: Chemin de destination (str)
|
|
16
9
|
:returns: True si succès, False sinon
|
|
17
10
|
"""
|
|
18
|
-
import io
|
|
19
11
|
from tqdm import tqdm
|
|
20
12
|
|
|
21
13
|
BUFFER_SIZE = 10 * 1024 * 1024 # 10MB buffer
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\","/"):
|
|
4
|
+
from Orange.widgets.orangecontrib.AAIT.utils import MetManagement
|
|
5
|
+
from Orange.widgets.orangecontrib.AAIT.utils.tools import (
|
|
6
|
+
change_owcorpus, concat_splitted_pypi)
|
|
7
|
+
else:
|
|
8
|
+
from orangecontrib.AAIT.utils import MetManagement
|
|
9
|
+
from orangecontrib.AAIT.utils.tools import (change_owcorpus,
|
|
10
|
+
concat_splitted_pypi)
|
|
11
|
+
#concat_splitted_pypi.unzip_dependancy_if_needed(concat_splitted_pypi.get_path_of_OrangeDir()+"/../aait_store",concat_splitted_pypi.get_path_of_OrangeDir()+"/../aait_store/Parameters/requirements.json",concat_splitted_pypi.get_site_package_path()+"aait_store_cut-part_001/input/aait_store.zip.001",16)
|
|
12
|
+
#concat_splitted_pypi.unzip_dependancy_if_needed(MetManagement.get_local_store_path()+"Models/NLP/all-mpnet-base-v2",MetManagement.get_local_store_path()+"Models/NLP/all-mpnet-base-v2/model.safetensors",concat_splitted_pypi.get_site_package_path()+"all-mpnet-base-v2-pypi-part_001/input/all-mpnet-base-v2.zip.001",5)
|
|
13
|
+
#concat_splitted_pypi.unzip_dependancy_if_needed(concat_splitted_pypi.get_path_of_OrangeDir()+"/Lib/site-packages/forall/gpt4all",concat_splitted_pypi.get_path_of_OrangeDir()+"/Lib/site-packages/forall/gpt4all/bin/chat.exe",concat_splitted_pypi.get_site_package_path()+"gpt4all-pypi-part_001/input/gpt4all.zip.001",0)
|
|
14
|
+
change_owcorpus.replace_owcorpus_file()
|
|
@@ -452,48 +452,3 @@ def select_new_file_ctypes(file_filter="All Files (*.*)"):
|
|
|
452
452
|
return result.split('\0', 1)[0].replace("\\", "/")
|
|
453
453
|
|
|
454
454
|
|
|
455
|
-
|
|
456
|
-
import subprocess
|
|
457
|
-
import os
|
|
458
|
-
import sys
|
|
459
|
-
|
|
460
|
-
def mac_shellcopy(src, dest):
|
|
461
|
-
"""
|
|
462
|
-
Copy files or directories on macOS using AppleScript to show a native progress dialog.
|
|
463
|
-
:param src: Path to source file or directory (str or list of str)
|
|
464
|
-
:param dest: Path to destination directory (str)
|
|
465
|
-
:returns: True if successful, False otherwise
|
|
466
|
-
"""
|
|
467
|
-
if isinstance(src, str):
|
|
468
|
-
src = [src]
|
|
469
|
-
src = [os.path.abspath(s) for s in src]
|
|
470
|
-
dest = os.path.abspath(dest)
|
|
471
|
-
|
|
472
|
-
# AppleScript for copying with progress dialog
|
|
473
|
-
script = f'''
|
|
474
|
-
set srcList to {{{", ".join([f'POSIX file "{s}"' for s in src])}}}
|
|
475
|
-
set destFolder to POSIX file "{dest}"
|
|
476
|
-
tell application "Finder"
|
|
477
|
-
repeat with aSrc in srcList
|
|
478
|
-
duplicate aSrc to destFolder with replacing
|
|
479
|
-
end repeat
|
|
480
|
-
end tell
|
|
481
|
-
'''
|
|
482
|
-
|
|
483
|
-
try:
|
|
484
|
-
result = subprocess.run(
|
|
485
|
-
['osascript', '-e', script],
|
|
486
|
-
capture_output=True,
|
|
487
|
-
text=True
|
|
488
|
-
)
|
|
489
|
-
if result.returncode == 0:
|
|
490
|
-
return True
|
|
491
|
-
else:
|
|
492
|
-
print("AppleScript error:", result.stderr)
|
|
493
|
-
return False
|
|
494
|
-
except Exception as e:
|
|
495
|
-
print("Error:", e)
|
|
496
|
-
return False
|
|
497
|
-
|
|
498
|
-
# Usage:
|
|
499
|
-
# mac_shellcopy("/path/to/source", "/path/to/destination")
|