aait 2.2.2.3__tar.gz → 2.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aait-2.2.3/PKG-INFO +9 -0
- aait-2.2.3/aait.egg-info/PKG-INFO +9 -0
- {aait-2.2.2.3 → aait-2.2.3}/aait.egg-info/SOURCES.txt +1 -6
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/answers.py +1 -1
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/process_documents.py +101 -125
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/MetManagement.py +0 -13
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/windows_utils.py +114 -158
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWAccumulator.py +8 -23
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWEditTable.py +2 -2
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWEmptySwitch.py +1 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWFileMetadata.py +33 -32
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWFileSyncChecker.py +9 -9
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWFileWithPath.py +6 -5
- aait-2.2.3/orangecontrib/AAIT/widgets/OWFindFilesFromDir.py +161 -0
- aait-2.2.3/orangecontrib/AAIT/widgets/OWFusionNM.py +450 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWGenerateQuestions.py +1 -1
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWKeywordsDetection.py +1 -5
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWQueryLLM.py +3 -3
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owaccumulator.ui +0 -3
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owextrachunks.ui +1 -14
- aait-2.2.3/orangecontrib/AAIT/widgets/designer/owfindfilesfromdir.ui +100 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owkeyword.ui +1 -1
- {aait-2.2.2.3 → aait-2.2.3}/setup.py +1 -1
- aait-2.2.2.3/PKG-INFO +0 -33
- aait-2.2.2.3/aait.egg-info/PKG-INFO +0 -33
- aait-2.2.2.3/orangecontrib/AAIT/widgets/OWFindFilesFromDir.py +0 -335
- aait-2.2.2.3/orangecontrib/AAIT/widgets/OWFusionNM.py +0 -410
- aait-2.2.2.3/orangecontrib/AAIT/widgets/OWKeywords.py +0 -153
- aait-2.2.2.3/orangecontrib/AAIT/widgets/OWLoadDocuments.py +0 -125
- aait-2.2.2.3/orangecontrib/AAIT/widgets/designer/owfindfilesfromdir.ui +0 -171
- aait-2.2.2.3/orangecontrib/AAIT/widgets/designer/owloaddocuments.ui +0 -86
- aait-2.2.2.3/orangecontrib/AAIT/widgets/icons/owkeywords.png +0 -0
- aait-2.2.2.3/orangecontrib/AAIT/widgets/icons/owloaddocuments.svg +0 -52
- {aait-2.2.2.3 → aait-2.2.3}/License.txt +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/aait.egg-info/dependency_links.txt +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/aait.egg-info/entry_points.txt +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/aait.egg-info/namespace_packages.txt +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/aait.egg-info/requires.txt +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/aait.egg-info/top_level.txt +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/__init__.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/SignalReceiver.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/__init__.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/audit_widget.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/dataTests/00079473A - Copie (2).TIF +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/dataTests/00079473A - Copie (3).TIF +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/dataTests/00079473A - Copie (4).TIF +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/dataTests/Tir 81mm_0001.csv +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/dataTests/Tir 81mm_0002.csv +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/dataTests/Tir 81mm_0003.csv +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/dynamic_results.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/test_all_widgets.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/test_server.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/widget_4all.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/widget_chunking.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/widget_edit_table.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/widget_mpnet_create_embeddings.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/widget_optimisation.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/widget_optimisationselection.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/widget_powfactory.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/widget_queryllm.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/widget_spacy_md_fr_lemmatizer.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/widget_traduction.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/audit_widget/widgets_model.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/encapsulation/__init__.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/fix_torch/fix_torch_dll_error.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/fix_torch/libomp140.x86_64.dll +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/MergeBaseLora.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/SERV_kill_workflow.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/SERV_requests.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/SERV_start_workflow.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/Tutorial_Finetuning.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/Tutorial_TestFinetuning.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/__init__.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/chunking.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/embeddings.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/finetuning.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/functions_DatasetGeneration.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/functions_Finetuning.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/lemmes.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/lmstudio.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/main_DatasetGeneration.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/main_Finetuning.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/prompt_management.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/quickpy.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/test.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/test_functions.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/llm/translations.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/optimiser/__init__.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/optimiser/optuna_multi.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/CheckMetaData.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/SimpleDialogQt.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/__init__.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/aait_repo_file.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/check_data_in.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/delta_local_shared_fodler.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/import_uic.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/initialize_from_ini.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/mac_utils.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/shared_functions.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/shared_variables.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/subprocess_management.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/thread_management.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/tools/__init__.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/tools/change_owcorpus.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/tools/concat_splitted_pypi.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/tools/first_time_check.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/utils/tools/owcorpus_ok.txt +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWAAITResourcesManager.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWAddColumns.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWApplyRules.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWCN2rule_view.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWChunking.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWConcatRules.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWConverseLLM.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWCreateEmbeddings.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWDisplayMD.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWEndLoop.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWExecuteScript.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWExtraChunks.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWExtractTokens.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWGenerateSynthesis.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWGenerateWord.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWInputSelector.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWLMStudio.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWLanguageDetection.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWLemmatizer.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_CE_MiniLML6.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_Falcon.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_HelsinkiEnFr.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_HelsinkiFrEn.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_MPNET.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_Mistral.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_Qwen.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_Qwen1B5_Q6.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_Qwen2_5_32B.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_Qwen3B_Q4.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q4.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q6.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_Solar.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_SolarUncensored.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_EN.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_FR.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWOptimisation.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWOptimisationSelection.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWOptimisationSendScore.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWProcessDocumentsFromPath.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWRandomData.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWReranking.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWSaveFilepathEntry.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWSelectColumnDynamique.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWSelectRowsDynamic.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWStartLoop.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWTable2Corpus.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWTranslation.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWTrigger.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/OWUnzipFolder.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/POW_Wfactory.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/__init__.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owMarkdown.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/ow_in_or_out_path.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/ow_widget_random_data.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owaddcolumns.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owapplyrules.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owchunking.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owconcatrules.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owconversellm.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owembeddings.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owemptyswitch.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owendloop.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owexecutescript.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owextracttokens.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owfilemetadata.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owfilesyncchecker.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owfilewithpath.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owfusion_nm.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owgenerate_word.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owgeneratequestions.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owgeneratesynthesis.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owkeywordsdetection.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owlangdetect.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owlemmatizer.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owloadworkflow.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_ce_minilml6.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_falcon.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_en_fr.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_fr_en.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_mistral.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_mpnet.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_qwen.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_2.5_32b.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_1.5b_q6.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_3b_q4.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_7b_q4.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_7b_q6.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_qwencoder_7b.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_solar.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_solar_uncensored.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_en.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_fr.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/ownumberpointinrules.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owprocessdocuments.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owqueryllm.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owreranking.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owsavewithpath.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owselect_column_dynamic.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owselect_row_dynamic.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owstartloop.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owtable2corpus.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owtranslation.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owtrigger.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/designer/owunzipfolder.ui +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/CN2RuleViewer.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/MDViewer.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/Mistral.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/apply_rules.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/category.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/dark_green.txt +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/de.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/document_generator.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/documents.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/endloop.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/extra_chunks.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/in_or_out.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/input.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/languages.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/lm_studio.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/local_interf_img_multi_pull.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/local_interf_multi_pull.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/local_interf_pull.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/local_interf_push.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/local_interf_text_pull.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/logo_solar.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/logo_uncensoredsolar.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/logo_upload.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/models.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/optimisation.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/optimizer.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/output.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owCN2_intersect_rules.svg +0 -0
- /aait-2.2.2.3/orangecontrib/AAIT/widgets/icons/owaccumulatator.png → /aait-2.2.3/orangecontrib/AAIT/widgets/icons/owaccumulator.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owchunking.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owconversellm.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owedittable.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owembeddings.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owemptyswitch.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owenvinfo.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owexecutescript.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owextracttokens.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owfilemetadata.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owfilesfromdir.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owfilesyncchecker.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owfilewithpath.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owfusion_nm.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owgeneratequestions.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owgeneratesynthesis.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owkeywordsdetection.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owlemmatizer.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owmodel_ce_minilml6.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owmodel_falcon.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_en_fr.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_fr_en.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owmodel_mpnet.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_en.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_fr.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owoptimisation.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owoptimisationselection.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owqueryllm.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owreranking.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owsavefilepathentry.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owselectcolumndynamique.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owtable2corpus.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owtranslation.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/owtrigger.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/processdocuments.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/qwen-color.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/select_dynamic_row.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/startloop.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/tools.png +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/widgetFactory.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/AAIT/widgets/icons/zip.svg +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/orangecontrib/__init__.py +0 -0
- {aait-2.2.2.3 → aait-2.2.3}/setup.cfg +0 -0
aait-2.2.3/PKG-INFO
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: aait
|
|
3
|
+
Version: 2.2.3
|
|
4
|
+
Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
|
|
5
|
+
Home-page:
|
|
6
|
+
Author: Orange community
|
|
7
|
+
Author-email:
|
|
8
|
+
Keywords: orange3 add-on
|
|
9
|
+
License-File: License.txt
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: aait
|
|
3
|
+
Version: 2.2.3
|
|
4
|
+
Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
|
|
5
|
+
Home-page:
|
|
6
|
+
Author: Orange community
|
|
7
|
+
Author-email:
|
|
8
|
+
Keywords: orange3 add-on
|
|
9
|
+
License-File: License.txt
|
|
@@ -105,12 +105,10 @@ orangecontrib/AAIT/widgets/OWGenerateQuestions.py
|
|
|
105
105
|
orangecontrib/AAIT/widgets/OWGenerateSynthesis.py
|
|
106
106
|
orangecontrib/AAIT/widgets/OWGenerateWord.py
|
|
107
107
|
orangecontrib/AAIT/widgets/OWInputSelector.py
|
|
108
|
-
orangecontrib/AAIT/widgets/OWKeywords.py
|
|
109
108
|
orangecontrib/AAIT/widgets/OWKeywordsDetection.py
|
|
110
109
|
orangecontrib/AAIT/widgets/OWLMStudio.py
|
|
111
110
|
orangecontrib/AAIT/widgets/OWLanguageDetection.py
|
|
112
111
|
orangecontrib/AAIT/widgets/OWLemmatizer.py
|
|
113
|
-
orangecontrib/AAIT/widgets/OWLoadDocuments.py
|
|
114
112
|
orangecontrib/AAIT/widgets/OWModel_CE_MiniLML6.py
|
|
115
113
|
orangecontrib/AAIT/widgets/OWModel_Falcon.py
|
|
116
114
|
orangecontrib/AAIT/widgets/OWModel_HelsinkiEnFr.py
|
|
@@ -171,7 +169,6 @@ orangecontrib/AAIT/widgets/designer/owkeyword.ui
|
|
|
171
169
|
orangecontrib/AAIT/widgets/designer/owkeywordsdetection.ui
|
|
172
170
|
orangecontrib/AAIT/widgets/designer/owlangdetect.ui
|
|
173
171
|
orangecontrib/AAIT/widgets/designer/owlemmatizer.ui
|
|
174
|
-
orangecontrib/AAIT/widgets/designer/owloaddocuments.ui
|
|
175
172
|
orangecontrib/AAIT/widgets/designer/owloadworkflow.ui
|
|
176
173
|
orangecontrib/AAIT/widgets/designer/owmodel_ce_minilml6.ui
|
|
177
174
|
orangecontrib/AAIT/widgets/designer/owmodel_falcon.ui
|
|
@@ -230,7 +227,7 @@ orangecontrib/AAIT/widgets/icons/optimisation.png
|
|
|
230
227
|
orangecontrib/AAIT/widgets/icons/optimizer.png
|
|
231
228
|
orangecontrib/AAIT/widgets/icons/output.png
|
|
232
229
|
orangecontrib/AAIT/widgets/icons/owCN2_intersect_rules.svg
|
|
233
|
-
orangecontrib/AAIT/widgets/icons/
|
|
230
|
+
orangecontrib/AAIT/widgets/icons/owaccumulator.png
|
|
234
231
|
orangecontrib/AAIT/widgets/icons/owchunking.png
|
|
235
232
|
orangecontrib/AAIT/widgets/icons/owconversellm.svg
|
|
236
233
|
orangecontrib/AAIT/widgets/icons/owedittable.svg
|
|
@@ -246,10 +243,8 @@ orangecontrib/AAIT/widgets/icons/owfilewithpath.svg
|
|
|
246
243
|
orangecontrib/AAIT/widgets/icons/owfusion_nm.png
|
|
247
244
|
orangecontrib/AAIT/widgets/icons/owgeneratequestions.svg
|
|
248
245
|
orangecontrib/AAIT/widgets/icons/owgeneratesynthesis.png
|
|
249
|
-
orangecontrib/AAIT/widgets/icons/owkeywords.png
|
|
250
246
|
orangecontrib/AAIT/widgets/icons/owkeywordsdetection.png
|
|
251
247
|
orangecontrib/AAIT/widgets/icons/owlemmatizer.svg
|
|
252
|
-
orangecontrib/AAIT/widgets/icons/owloaddocuments.svg
|
|
253
248
|
orangecontrib/AAIT/widgets/icons/owmodel_ce_minilml6.svg
|
|
254
249
|
orangecontrib/AAIT/widgets/icons/owmodel_falcon.png
|
|
255
250
|
orangecontrib/AAIT/widgets/icons/owmodel_helsinki_en_fr.svg
|
|
@@ -224,7 +224,7 @@ def write_tokens_to_file(token: str, workflow_id=""):
|
|
|
224
224
|
f.flush()
|
|
225
225
|
|
|
226
226
|
|
|
227
|
-
def run_query(prompt, model, max_tokens=4096, temperature=0, top_p=0, top_k=40, repeat_penalty=1.15,
|
|
227
|
+
def run_query(prompt, model, max_tokens=4096, temperature=0.4, top_p=0.4, top_k=40, repeat_penalty=1.15,
|
|
228
228
|
workflow_id="", argself=None, progress_callback=None):
|
|
229
229
|
"""
|
|
230
230
|
Generates a response from a local LLM model using the given prompt, with support for streaming output
|
|
@@ -9,31 +9,37 @@ import fitz
|
|
|
9
9
|
import docx
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
|
|
13
12
|
def process_documents(dirpath):
|
|
14
|
-
if dirpath is None or not
|
|
13
|
+
if dirpath is None or not Path(dirpath).exists():
|
|
15
14
|
return None, None
|
|
16
15
|
|
|
16
|
+
# Normalize dirpath
|
|
17
|
+
dirpath = Path(dirpath).resolve()
|
|
18
|
+
|
|
17
19
|
# get path from user selection
|
|
18
20
|
embeddings = check_for_embeddings(dirpath)
|
|
19
|
-
dirpath = dirpath.replace("\\","/")
|
|
20
21
|
|
|
21
22
|
# Set selected path in the saved embeddings
|
|
22
23
|
if embeddings is not None:
|
|
23
|
-
common_path = find_common_root(embeddings).
|
|
24
|
+
common_path = Path(find_common_root(embeddings)).resolve()
|
|
24
25
|
for row in embeddings:
|
|
25
|
-
|
|
26
|
+
row_path = Path(str(row["path"].value)).resolve()
|
|
27
|
+
# Replace common root with the current dirpath
|
|
28
|
+
try:
|
|
29
|
+
row["path"] = str(dirpath / row_path.relative_to(common_path))
|
|
30
|
+
except ValueError:
|
|
31
|
+
# If relative_to fails (paths not matching), just normalize
|
|
32
|
+
row["path"] = str(row_path)
|
|
26
33
|
|
|
27
34
|
# Verify which files are already processed
|
|
28
35
|
files_to_process = get_files_to_process(dirpath, embeddings)
|
|
29
36
|
|
|
30
37
|
rows = []
|
|
31
38
|
for file in files_to_process:
|
|
32
|
-
|
|
33
|
-
content = extract_text(file)
|
|
34
|
-
filename =
|
|
35
|
-
|
|
36
|
-
row = [file, filename, content]
|
|
39
|
+
file = Path(file).resolve()
|
|
40
|
+
content = extract_text(str(file)) # extractor may expect string
|
|
41
|
+
filename = file.name
|
|
42
|
+
row = [str(file), filename, content] # store strings in Orange table
|
|
37
43
|
rows.append(row)
|
|
38
44
|
|
|
39
45
|
# Build a table with the constructed rows
|
|
@@ -45,136 +51,115 @@ def process_documents(dirpath):
|
|
|
45
51
|
return out_data, embeddings
|
|
46
52
|
|
|
47
53
|
|
|
54
|
+
def check_for_embeddings(folder_path):
|
|
55
|
+
"""
|
|
56
|
+
Check for an embeddings.pkl file in a given folder. Return its content if it exists.
|
|
57
|
+
|
|
58
|
+
Parameters:
|
|
59
|
+
folder_path (str | Path): The path to the folder where embeddings.pkl may exist.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Table or None: The content of embeddings.pkl, or None if not found.
|
|
63
|
+
"""
|
|
64
|
+
folder_path = Path(folder_path).resolve()
|
|
65
|
+
|
|
66
|
+
filepaths = [
|
|
67
|
+
folder_path / "embeddings_question.pkl",
|
|
68
|
+
folder_path / "embeddings.pkl"
|
|
69
|
+
]
|
|
70
|
+
|
|
71
|
+
for filepath in filepaths:
|
|
72
|
+
if filepath.exists():
|
|
73
|
+
return Table.from_file(str(filepath)) # Table.from_file expects a str
|
|
74
|
+
return None
|
|
75
|
+
|
|
76
|
+
|
|
48
77
|
def find_common_root(data_table, column_name="path"):
|
|
49
78
|
"""Finds the common root path from a column of file paths in an Orange Data Table."""
|
|
50
|
-
paths = [str(row[column_name])
|
|
79
|
+
paths = [Path(str(row[column_name].value)).resolve()
|
|
80
|
+
for row in data_table if row[column_name] is not None]
|
|
51
81
|
if not paths:
|
|
52
82
|
return ""
|
|
53
|
-
return os.path.commonpath(paths)
|
|
83
|
+
return str(Path(os.path.commonpath(paths)).resolve())
|
|
54
84
|
|
|
55
85
|
|
|
56
86
|
def get_files_to_process(folder_path, table=None):
|
|
57
87
|
"""
|
|
58
|
-
Finds all PDF files in a folder (including subfolders) that are not already in the table
|
|
59
|
-
|
|
88
|
+
Finds all PDF/DOCX files in a folder (including subfolders) that are not already in the table
|
|
89
|
+
or that have changed since last check (based on file size).
|
|
60
90
|
|
|
61
91
|
:param folder_path: Path to the folder to scan for documents.
|
|
62
|
-
:param table: Orange Data Table with
|
|
63
|
-
:return: List of
|
|
92
|
+
:param table: Orange Data Table with column "path".
|
|
93
|
+
:return: List of new/changed file paths.
|
|
64
94
|
"""
|
|
65
|
-
|
|
66
|
-
# Supported file extensions
|
|
67
|
-
supported_extensions = [".pdf", ".docx"]
|
|
95
|
+
supported_extensions = {".pdf", ".docx"}
|
|
68
96
|
|
|
69
|
-
|
|
70
|
-
filepath_sizes =
|
|
71
|
-
|
|
97
|
+
folder_path = Path(folder_path).resolve()
|
|
98
|
+
filepath_sizes = folder_path / "sizes.json"
|
|
99
|
+
|
|
100
|
+
print(filepath_sizes)
|
|
101
|
+
print(filepath_sizes.exists())
|
|
102
|
+
|
|
103
|
+
# Load previous file sizes
|
|
104
|
+
if filepath_sizes.exists():
|
|
72
105
|
with open(filepath_sizes, "r") as json_file:
|
|
73
|
-
sizes = json.load(json_file)
|
|
106
|
+
sizes = {Path(k): v for k, v in json.load(json_file).items()}
|
|
74
107
|
else:
|
|
75
|
-
sizes =
|
|
108
|
+
sizes = {}
|
|
109
|
+
|
|
110
|
+
print(sizes)
|
|
76
111
|
|
|
77
|
-
# Extract the existing
|
|
112
|
+
# Extract the existing paths from the Orange Data Table
|
|
78
113
|
if table:
|
|
79
|
-
|
|
114
|
+
# Orange stores metas as strings → turn them into Paths
|
|
115
|
+
existing_paths = {Path(str(p)).resolve() for p in table[:, "path"].metas.flatten()}
|
|
80
116
|
else:
|
|
81
117
|
existing_paths = set()
|
|
82
118
|
|
|
83
|
-
|
|
84
|
-
# Walk through the folder and its subfolders
|
|
85
119
|
new_files = []
|
|
86
|
-
for root, _, files in os.walk(folder_path):
|
|
87
|
-
for file in files:
|
|
88
|
-
# Check if the file has a supported extension
|
|
89
|
-
if os.path.splitext(file)[1].lower() in supported_extensions:
|
|
90
|
-
# Add the file if it is not already in the table
|
|
91
|
-
filepath = os.path.join(root, file).replace("\\","/")
|
|
92
|
-
if filepath not in existing_paths:
|
|
93
|
-
new_files.append(filepath)
|
|
94
|
-
sizes[filepath] = os.path.getsize(filepath)
|
|
95
|
-
# If the file is in the table, verify if the file has been modified (comparing the size)
|
|
96
|
-
else:
|
|
97
|
-
new_size = os.path.getsize(filepath)
|
|
98
|
-
if filepath not in sizes.keys():
|
|
99
|
-
sizes[filepath] = new_size
|
|
100
|
-
else:
|
|
101
|
-
old_size = sizes[filepath]
|
|
102
|
-
if old_size != new_size:
|
|
103
|
-
new_files.append(filepath)
|
|
104
|
-
table = remove_from_table(filepath, table)
|
|
105
|
-
sizes[filepath] = new_size
|
|
106
|
-
with open(filepath_sizes, "w") as json_file:
|
|
107
|
-
json.dump(sizes, json_file, indent=4)
|
|
108
|
-
return new_files
|
|
109
120
|
|
|
121
|
+
# Walk through the folder and subfolders
|
|
122
|
+
for file in folder_path.rglob("*"):
|
|
123
|
+
if file.suffix.lower() in supported_extensions:
|
|
124
|
+
file = file.resolve()
|
|
125
|
+
size = file.stat().st_size
|
|
110
126
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
127
|
+
if file not in existing_paths:
|
|
128
|
+
# New file
|
|
129
|
+
new_files.append(str(file))
|
|
130
|
+
sizes[Path(ntpath.basename(file))] = size
|
|
131
|
+
else:
|
|
132
|
+
# File already in table: check if size changed
|
|
133
|
+
print("File: ", Path(ntpath.basename(file)))
|
|
134
|
+
old_size = sizes.get(Path(ntpath.basename(file)))
|
|
135
|
+
print(old_size)
|
|
136
|
+
if old_size is None or old_size != size:
|
|
137
|
+
new_files.append(str(file))
|
|
138
|
+
table = remove_from_table(file, table)
|
|
139
|
+
sizes[Path(ntpath.basename(file))] = size
|
|
116
140
|
|
|
117
|
-
def check_for_embeddings(folder_path):
|
|
118
|
-
"""
|
|
119
|
-
Check for an embeddings.pkl file in a given folder. Return its content if it exists.
|
|
120
141
|
|
|
121
|
-
|
|
122
|
-
|
|
142
|
+
# Save updated sizes.json (keys must be strings for JSON)
|
|
143
|
+
with open(filepath_sizes, "w") as json_file:
|
|
144
|
+
json.dump({str(k): v for k, v in sizes.items()}, json_file, indent=4)
|
|
123
145
|
|
|
124
|
-
|
|
125
|
-
Table or None: The content of embeddings.pkl.
|
|
126
|
-
"""
|
|
127
|
-
filepaths = [os.path.join(folder_path, "embeddings_question.pkl"),
|
|
128
|
-
os.path.join(folder_path, "embeddings.pkl")]
|
|
129
|
-
for filepath in filepaths:
|
|
130
|
-
if os.path.exists(filepath):
|
|
131
|
-
data = Table.from_file(filepath)
|
|
132
|
-
return data
|
|
133
|
-
else:
|
|
134
|
-
return None
|
|
146
|
+
return new_files
|
|
135
147
|
|
|
136
148
|
|
|
137
|
-
def
|
|
149
|
+
def remove_from_table(filepath, table):
|
|
138
150
|
"""
|
|
139
|
-
|
|
140
|
-
as a new column "content".
|
|
141
|
-
|
|
142
|
-
:param table: Orange.data.Table containing file paths in a column named "path".
|
|
143
|
-
:return: Orange.data.Table with an added meta column "content" containing the extracted text.
|
|
151
|
+
Remove rows from the Orange table where 'path' matches the given filepath.
|
|
144
152
|
"""
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
# Get text and name
|
|
156
|
-
name = Path(filepath).name
|
|
157
|
-
text = extract_text(filepath)
|
|
158
|
-
# Store results
|
|
159
|
-
names.append(name)
|
|
160
|
-
texts.append(text)
|
|
161
|
-
# Update progress if a callback is provided
|
|
162
|
-
if progress_callback is not None:
|
|
163
|
-
progress_value = float(100 * (i + 1) / len(data))
|
|
164
|
-
progress_callback(progress_value)
|
|
165
|
-
# Check if processing should be stopped
|
|
166
|
-
if argself is not None and getattr(argself, "stop", False):
|
|
167
|
-
break
|
|
168
|
-
|
|
169
|
-
# Create a StringVariable for the new column
|
|
170
|
-
var_content = StringVariable("content")
|
|
171
|
-
var_name = StringVariable("name")
|
|
172
|
-
|
|
173
|
-
# Add the column as a meta-column in the table
|
|
174
|
-
data = data.add_column(variable=var_name, data=names, to_metas=True)
|
|
175
|
-
data = data.add_column(variable=var_content, data=texts, to_metas=True)
|
|
176
|
-
return data
|
|
177
|
-
|
|
153
|
+
filepath = Path(filepath).resolve()
|
|
154
|
+
|
|
155
|
+
filtered_table = Table.from_list(
|
|
156
|
+
domain=table.domain,
|
|
157
|
+
rows=[
|
|
158
|
+
row for row in table
|
|
159
|
+
if Path(str(row["path"].value)).resolve() != filepath
|
|
160
|
+
]
|
|
161
|
+
)
|
|
162
|
+
return filtered_table
|
|
178
163
|
|
|
179
164
|
|
|
180
165
|
def extract_text(filepath):
|
|
@@ -192,13 +177,11 @@ def extract_text(filepath):
|
|
|
192
177
|
return extract_text_from_pdf(filepath)
|
|
193
178
|
elif file_extension == ".docx":
|
|
194
179
|
return extract_text_from_docx(filepath)
|
|
195
|
-
elif file_extension in [".txt", ".md"]:
|
|
196
|
-
return extract_text_from_txt(filepath)
|
|
197
180
|
else:
|
|
198
|
-
|
|
181
|
+
raise ValueError("Format de fichier non supporté. Utilisez un fichier PDF ou DOCX.")
|
|
199
182
|
except Exception as e:
|
|
200
183
|
print(f"Erreur lors de l'extraction de texte depuis {filepath}: {e}")
|
|
201
|
-
return
|
|
184
|
+
return "Extraction Error"
|
|
202
185
|
|
|
203
186
|
|
|
204
187
|
def extract_text_from_pdf(pdf_path):
|
|
@@ -222,7 +205,7 @@ def extract_text_from_pdf(pdf_path):
|
|
|
222
205
|
return extracted_text
|
|
223
206
|
except Exception as e:
|
|
224
207
|
print(f"Erreur lors de l'extraction de texte depuis {pdf_path}: {e}")
|
|
225
|
-
return
|
|
208
|
+
return "Extraction Error"
|
|
226
209
|
|
|
227
210
|
|
|
228
211
|
def extract_text_from_docx(docx_path):
|
|
@@ -267,20 +250,13 @@ def extract_text_from_docx(docx_path):
|
|
|
267
250
|
row_text = [cell.text.strip() for cell in row.cells] # Extrait le texte de chaque cellule
|
|
268
251
|
table_text.append("\t".join(row_text)) # Sépare les colonnes par des tabulations
|
|
269
252
|
extracted_text.append("\n".join(table_text)) # Ajoute le tableau sous forme de texte
|
|
253
|
+
|
|
270
254
|
return "\n".join(filter(None, extracted_text)) # Retourne le texte en filtrant les vides
|
|
271
255
|
|
|
272
256
|
except Exception as e:
|
|
273
257
|
print(f"Erreur lors de l'extraction de texte depuis {docx_path}: {e}")
|
|
274
|
-
return
|
|
275
|
-
|
|
258
|
+
return "Extraction Error"
|
|
276
259
|
|
|
277
|
-
def extract_text_from_txt(filepath):
|
|
278
|
-
try:
|
|
279
|
-
with open(filepath, 'r', encoding='utf-8') as f:
|
|
280
|
-
return f.read()
|
|
281
|
-
except Exception as e:
|
|
282
|
-
print(f"Erreur lors de l'extraction de texte depuis {filepath}: {e}")
|
|
283
|
-
return f"ERROR: Extraction Error ({e})"
|
|
284
260
|
|
|
285
261
|
|
|
286
262
|
def get_pages_of_extract(pdf_path, extract):
|
|
@@ -1368,19 +1368,6 @@ def write_file_time(path):
|
|
|
1368
1368
|
except Exception as e:
|
|
1369
1369
|
print(f"Error creating .ok file: {e}")
|
|
1370
1370
|
return
|
|
1371
|
-
def write_file_arbitrary_time(path,time):
|
|
1372
|
-
time= int(time)
|
|
1373
|
-
time_ok=path[:-4]+".ok"
|
|
1374
|
-
reset_files([time_ok])
|
|
1375
|
-
with open(path, "w") as f:
|
|
1376
|
-
f.write(str(time))
|
|
1377
|
-
f.flush()
|
|
1378
|
-
try:
|
|
1379
|
-
with open(time_ok, "w"):
|
|
1380
|
-
pass
|
|
1381
|
-
except Exception as e:
|
|
1382
|
-
print(f"Error creating .ok file: {e}")
|
|
1383
|
-
return
|
|
1384
1371
|
|
|
1385
1372
|
def read_file_time(path):
|
|
1386
1373
|
time_ok = path[:-4] + ".ok"
|