aait 2.3.15.1__tar.gz → 2.3.15.990__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aait-2.3.15.1 → aait-2.3.15.990}/PKG-INFO +1 -1
- {aait-2.3.15.1 → aait-2.3.15.990}/aait.egg-info/PKG-INFO +1 -1
- {aait-2.3.15.1 → aait-2.3.15.990}/aait.egg-info/SOURCES.txt +4 -2
- aait-2.3.15.990/aait.egg-info/requires.txt +19 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/__init__.py +1 -2
- aait-2.3.15.990/orangecontrib/AAIT/llm/chunking.py +149 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/llm/lemmes.py +2 -2
- aait-2.3.15.990/orangecontrib/AAIT/llm/resources/markdown_recipe.json +65 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/aait_repo_file.py +3 -3
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/subprocess_management.py +2 -2
- aait-2.3.15.990/orangecontrib/AAIT/utils/tools/__init__.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/windows_utils.py +2 -1
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWAAITResourcesManager.py +0 -3
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWApplyRules.py +1 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWChunking.py +9 -10
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWConverseLLM.py +1 -2
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWDisplayMD.py +0 -1
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWEditTable.py +3 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWEmptySwitch.py +2 -2
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWExtractTokens.py +2 -2
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWFileWithPath.py +4 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWFindFilesFromDir.py +1 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWGenerateWord.py +2 -7
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWGetPages.py +2 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWKeywordsDetection.py +122 -20
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWLLMEngine.py +4 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWLanguageDetection.py +2 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWOperationSystem.py +2 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWQuadrantclicker.py +0 -1
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWSaveFilepathEntry.py +3 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWSortAndSelect.py +1 -0
- aait-2.3.15.990/orangecontrib/AAIT/widgets/OWSplitExcelSheets.py +231 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owchunking.ui +17 -2
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owkeywordsdetection.ui +5 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owsplitexcelsheets.ui +5 -14
- aait-2.3.15.990/orangecontrib/AAIT/widgets/icons/splitexcelsheets.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/setup.py +15 -15
- aait-2.3.15.1/aait.egg-info/requires.txt +0 -19
- aait-2.3.15.1/orangecontrib/AAIT/fix_torch/fix_torch_dll_error.py +0 -25
- aait-2.3.15.1/orangecontrib/AAIT/fix_torch/libomp140.x86_64.dll +0 -0
- aait-2.3.15.1/orangecontrib/AAIT/llm/chunking.py +0 -150
- {aait-2.3.15.1 → aait-2.3.15.990}/License.txt +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/aait.egg-info/dependency_links.txt +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/aait.egg-info/entry_points.txt +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/aait.egg-info/namespace_packages.txt +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/aait.egg-info/top_level.txt +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/llm/__init__.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/llm/answers.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/llm/answers_llama.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/llm/embeddings.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/llm/lmstudio.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/llm/process_documents.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/llm/prompt_management.py +0 -0
- /aait-2.3.15.1/orangecontrib/AAIT/optimiser/__init__.py → /aait-2.3.15.990/orangecontrib/AAIT/llm/resources/__ini__.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/llm/translations.py +0 -0
- {aait-2.3.15.1/orangecontrib/AAIT/utils → aait-2.3.15.990/orangecontrib/AAIT/optimiser}/__init__.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/optimiser/optuna_multi.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/MetManagement.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/OperationSystem.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/SimpleDialogQt.py +0 -0
- {aait-2.3.15.1/orangecontrib/AAIT/utils/tools → aait-2.3.15.990/orangecontrib/AAIT/utils}/__init__.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/aait_table_viewer.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/base_widget.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/delta_local_shared_fodler.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/import_uic.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/initialize_from_ini.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/mac_utils.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/shared_functions.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/shared_variables.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/thread_management.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/tools/change_owcorpus.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/tools/concat_splitted_pypi.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/tools/first_time_check.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/tools/owcorpus_ok.txt +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/unlink_table_domain.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/utils/widget_positioning.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWAccumulator.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWAddColumns.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWAutoShowCreateInstance.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWAutoShowTable.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWCN2rule_view.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWConcatRules.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWCreateEmbeddings.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWEndLoop.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWExecuteScript.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWExtraChunks.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWFileMetadata.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWFileSyncChecker.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWFusionNM.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWGenerateQuestions.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWGenerateSynthesis.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWInputSelector.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWKeywords.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWLMStudio.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWLargeLanguageModel.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWLemmatizer.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWLoadDocuments.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_CE_MiniLML6.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_Embeddings.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_Falcon.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_HelsinkiEnFr.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_HelsinkiFrEn.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_MPNET.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_Mistral.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_Qwen.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_Qwen1B5_Q6.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_Qwen2_5_32B.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_Qwen3B_Q4.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q4.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q6.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_Solar.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_SolarUncensored.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_EN.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_FR.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWOptimisation.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWOptimisationSelection.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWOptimisationSendScore.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWProcessDocumentsFromPath.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWQueryLLM.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWRandomData.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWReranking.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWSelectColumnDynamique.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWSelectRowsDynamic.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWStartLoop.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWTable2Corpus.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWTranslation.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWTrigger.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/OWUnzipFolder.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/POW_Wfactory.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/__init__.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owMarkdown.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/ow_OperationSystem.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/ow_in_or_out_path.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/ow_widget_random_data.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owaccumulator.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owaddcolumns.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owapplyrules.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owconcatrules.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owconversellm.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owedgellm.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owembeddings.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owemptyswitch.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owendloop.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owexecutescript.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owexecutescript_TEST.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owextrachunks.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owextracttokens.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owfilemetadata.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owfilesyncchecker.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owfilewithpath.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owfindfilesfromdir.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owfusion_nm.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owgenerate_word.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owgeneratequestions.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owgeneratesynthesis.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owgetpages.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owkeyword.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owlangdetect.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owlargelanguagemodel.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owlemmatizer.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owllm4all.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owloaddocuments.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owloadworkflow.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_ce_minilml6.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_embeddings.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_falcon.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_en_fr.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_fr_en.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_mistral.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_mpnet.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_qwen.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_2.5_32b.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_1.5b_q6.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_3b_q4.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_7b_q4.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_7b_q6.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_qwencoder_7b.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_solar.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_solar_uncensored.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_en.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_fr.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/ownumberpointinrules.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owprocessdocuments.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owquadrant_clicker.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owqueryllm.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owreranking.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owsavewithpath.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owselect_column_dynamic.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owselect_row_dynamic.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owsortandselect.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owstartloop.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owtable2corpus.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owtranslation.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owtrigger.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/designer/owunzipfolder.ui +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/CN2RuleViewer.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/CreateInstance.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/MDViewer.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/Mistral.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/Table.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/apply_rules.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/blue_down_arrow.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/book.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/category.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/dark_green.txt +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/de.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/document_generator.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/documents.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/edge_llm.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/endloop.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/extra_chunks.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/green_check.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/in_or_out.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/input.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/languages.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/lm_studio.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/local_interf_img_multi_pull.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/local_interf_multi_pull.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/local_interf_pull.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/local_interf_push.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/local_interf_text_pull.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/logo_solar.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/logo_uncensoredsolar.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/logo_upload.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/models.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/operationSystem.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/optimisation.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/optimizer.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/output.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owCN2_intersect_rules.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owaccumulator.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owchunking.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owconversellm.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owedittable.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owembeddings.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owemptyswitch.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owenvinfo.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owexecutescript.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owextracttokens.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owfilemetadata.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owfilesfromdir.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owfilesyncchecker.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owfilewithpath.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owfusion_nm.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owgeneratequestions.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owgeneratesynthesis.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owkeywords.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owkeywordsdetection.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owlargelanguagemodel.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owlemmatizer.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owloaddocuments.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owmodel_ce_minilml6.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owmodel_embeddings.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owmodel_falcon.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_en_fr.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_fr_en.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owmodel_mpnet.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_en.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_fr.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owoptimisation.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owoptimisationselection.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owqueryllm.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owreranking.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owsavefilepathentry.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owselectcolumndynamique.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owsortandselect.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owtable2corpus.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owtranslation.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/owtrigger.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/processdocuments.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/quadrantclicker.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/qwen-color.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/select_dynamic_row.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/startloop.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/tools.png +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/widgetFactory.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/AAIT/widgets/icons/zip.svg +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/orangecontrib/__init__.py +0 -0
- {aait-2.3.15.1 → aait-2.3.15.990}/setup.cfg +0 -0
|
@@ -9,8 +9,6 @@ aait.egg-info/requires.txt
|
|
|
9
9
|
aait.egg-info/top_level.txt
|
|
10
10
|
orangecontrib/__init__.py
|
|
11
11
|
orangecontrib/AAIT/__init__.py
|
|
12
|
-
orangecontrib/AAIT/fix_torch/fix_torch_dll_error.py
|
|
13
|
-
orangecontrib/AAIT/fix_torch/libomp140.x86_64.dll
|
|
14
12
|
orangecontrib/AAIT/llm/__init__.py
|
|
15
13
|
orangecontrib/AAIT/llm/answers.py
|
|
16
14
|
orangecontrib/AAIT/llm/answers_llama.py
|
|
@@ -21,6 +19,8 @@ orangecontrib/AAIT/llm/lmstudio.py
|
|
|
21
19
|
orangecontrib/AAIT/llm/process_documents.py
|
|
22
20
|
orangecontrib/AAIT/llm/prompt_management.py
|
|
23
21
|
orangecontrib/AAIT/llm/translations.py
|
|
22
|
+
orangecontrib/AAIT/llm/resources/__ini__.py
|
|
23
|
+
orangecontrib/AAIT/llm/resources/markdown_recipe.json
|
|
24
24
|
orangecontrib/AAIT/optimiser/__init__.py
|
|
25
25
|
orangecontrib/AAIT/optimiser/optuna_multi.py
|
|
26
26
|
orangecontrib/AAIT/utils/MetManagement.py
|
|
@@ -112,6 +112,7 @@ orangecontrib/AAIT/widgets/OWSaveFilepathEntry.py
|
|
|
112
112
|
orangecontrib/AAIT/widgets/OWSelectColumnDynamique.py
|
|
113
113
|
orangecontrib/AAIT/widgets/OWSelectRowsDynamic.py
|
|
114
114
|
orangecontrib/AAIT/widgets/OWSortAndSelect.py
|
|
115
|
+
orangecontrib/AAIT/widgets/OWSplitExcelSheets.py
|
|
115
116
|
orangecontrib/AAIT/widgets/OWStartLoop.py
|
|
116
117
|
orangecontrib/AAIT/widgets/OWTable2Corpus.py
|
|
117
118
|
orangecontrib/AAIT/widgets/OWTranslation.py
|
|
@@ -265,6 +266,7 @@ orangecontrib/AAIT/widgets/icons/processdocuments.svg
|
|
|
265
266
|
orangecontrib/AAIT/widgets/icons/quadrantclicker.svg
|
|
266
267
|
orangecontrib/AAIT/widgets/icons/qwen-color.png
|
|
267
268
|
orangecontrib/AAIT/widgets/icons/select_dynamic_row.png
|
|
269
|
+
orangecontrib/AAIT/widgets/icons/splitexcelsheets.png
|
|
268
270
|
orangecontrib/AAIT/widgets/icons/startloop.png
|
|
269
271
|
orangecontrib/AAIT/widgets/icons/tools.png
|
|
270
272
|
orangecontrib/AAIT/widgets/icons/widgetFactory.svg
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
torch
|
|
2
|
+
sentence-transformers
|
|
3
|
+
gpt4all[all]
|
|
4
|
+
sacremoses
|
|
5
|
+
transformers
|
|
6
|
+
sentencepiece
|
|
7
|
+
optuna
|
|
8
|
+
spacy
|
|
9
|
+
markdown
|
|
10
|
+
python-multipart
|
|
11
|
+
PyMuPDF
|
|
12
|
+
chonkie
|
|
13
|
+
GPUtil
|
|
14
|
+
unidecode
|
|
15
|
+
python-docx
|
|
16
|
+
psutil
|
|
17
|
+
thefuzz
|
|
18
|
+
beautifulsoup4
|
|
19
|
+
CATEGORIT
|
|
@@ -2,7 +2,7 @@ import Orange
|
|
|
2
2
|
from packaging import version
|
|
3
3
|
|
|
4
4
|
import sys
|
|
5
|
-
|
|
5
|
+
|
|
6
6
|
import os
|
|
7
7
|
# def check_executable_path():
|
|
8
8
|
# exe = sys.executable
|
|
@@ -52,7 +52,6 @@ else: # Execute the file
|
|
|
52
52
|
import os
|
|
53
53
|
import tempfile
|
|
54
54
|
import gc
|
|
55
|
-
import sys
|
|
56
55
|
from orangewidget.workflow.discovery import WidgetDiscovery
|
|
57
56
|
#from orangecanvas.registry import CategoryDescription
|
|
58
57
|
from orangecanvas.registry.utils import category_from_package_globals
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import re
|
|
3
|
+
import os
|
|
4
|
+
import pathlib
|
|
5
|
+
import Orange
|
|
6
|
+
from Orange.data import Domain, Table, StringVariable, ContinuousVariable
|
|
7
|
+
from chonkie import TokenChunker, SentenceChunker, RecursiveChunker, SemanticChunker, LateChunker
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def create_chunks(table, column_name, tokenizer="character", chunk_size=300, chunk_overlap=100, mode="Token", progress_callback=None, argself=None):
|
|
11
|
+
"""
|
|
12
|
+
Chunk the text in `column_name` of an Orange Table using a specialized chunker.
|
|
13
|
+
|
|
14
|
+
Splits each row's text into chunks based on the selected mode (Token, Sentence,
|
|
15
|
+
Recursive, or Markdown). Adds the chunked text and its metadata as new meta
|
|
16
|
+
columns to the table.
|
|
17
|
+
|
|
18
|
+
Parameters:
|
|
19
|
+
table (Table): Input data table.
|
|
20
|
+
column_name (str): Name of the text column to chunk.
|
|
21
|
+
tokenizer (str): Tokenizer type (e.g., "character").
|
|
22
|
+
chunk_size (int): Target chunk size.
|
|
23
|
+
chunk_overlap (int): Overlap between chunks (not used in all modes).
|
|
24
|
+
mode (str): Chunking strategy ("Token", "Sentence", "Recursive", "Markdown").
|
|
25
|
+
progress_callback (callable): Optional progress reporter.
|
|
26
|
+
argself: Optional caller reference.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
Table: The table with added meta columns: "Chunks", "Chunks size", and "Metadata".
|
|
30
|
+
"""
|
|
31
|
+
# Définir la fonction de chunking selon le mode
|
|
32
|
+
if mode == "Token":
|
|
33
|
+
chunker = TokenChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
34
|
+
elif mode == "Sentence":
|
|
35
|
+
chunker = SentenceChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap, min_sentences_per_chunk=1)
|
|
36
|
+
elif mode == "Recursive":
|
|
37
|
+
chunker = RecursiveChunker(tokenizer=tokenizer, chunk_size=chunk_size, min_characters_per_chunk=24)
|
|
38
|
+
|
|
39
|
+
### Les 3 modes suivants sont à tester après montée de version !!
|
|
40
|
+
### Attention à la gestion des modèles !! Tokenizer doit être un SentenceTransformer (Mpnet ou Qwen-0.6B)
|
|
41
|
+
### Gérer ça en amont dans le widget !
|
|
42
|
+
### Ajouter du paramétrage selon la méthode de chunking !!!
|
|
43
|
+
elif mode == "Markdown":
|
|
44
|
+
# from recipe, tester dépendances d'abord
|
|
45
|
+
current_dir = pathlib.Path(__file__).parent.resolve()
|
|
46
|
+
chunker = RecursiveChunker.from_recipe(path=os.path.join(current_dir, r"resources\markdown_recipe.json"), tokenizer=tokenizer, chunk_size=400, min_characters_per_chunk=24)
|
|
47
|
+
elif mode == "Semantic":
|
|
48
|
+
chunker = SemanticChunker(embedding_model=tokenizer, threshold=0.7, chunk_size=chunk_size)
|
|
49
|
+
elif mode == "Late":
|
|
50
|
+
chunker = LateChunker(embedding_model=tokenizer, chunk_size=chunk_size, min_characters_per_chunk=24)
|
|
51
|
+
else:
|
|
52
|
+
raise ValueError(f"Invalid mode: {mode}. Valid modes are: Token, Sentence, Recursive, Markdown")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
new_metas = list(table.domain.metas) + [StringVariable("Chunks"), ContinuousVariable("Chunks size"), ContinuousVariable("Chunks index"), ContinuousVariable("Chunks start"), ContinuousVariable("Chunks end"), StringVariable("Metadata")]
|
|
56
|
+
new_domain = Domain(table.domain.attributes, table.domain.class_vars, new_metas)
|
|
57
|
+
|
|
58
|
+
new_rows = []
|
|
59
|
+
for i, row in enumerate(table):
|
|
60
|
+
content = row[column_name].value
|
|
61
|
+
chunks = chunker(content)
|
|
62
|
+
# For each chunk in the chunked data
|
|
63
|
+
for j, chunk in enumerate(chunks):
|
|
64
|
+
# Build new metas with previous data and the chunk
|
|
65
|
+
new_metas_values = list(row.metas) + [chunk.text, chunk.token_count, j, chunk.start_index, chunk.end_index, ""]
|
|
66
|
+
# Create the new row instance
|
|
67
|
+
new_instance = Orange.data.Instance(new_domain, [row[x] for x in table.domain.attributes] + [row[y] for y in table.domain.class_vars] + new_metas_values)
|
|
68
|
+
# Store the new row
|
|
69
|
+
new_rows.append(new_instance)
|
|
70
|
+
|
|
71
|
+
if progress_callback is not None:
|
|
72
|
+
progress_value = float(100 * (i + 1) / len(table))
|
|
73
|
+
progress_callback(progress_value)
|
|
74
|
+
if argself is not None:
|
|
75
|
+
if argself.stop:
|
|
76
|
+
break
|
|
77
|
+
|
|
78
|
+
return Table.from_list(domain=new_domain, rows=new_rows)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
### Previous function - Metadatas were interesting
|
|
82
|
+
def chunk_markdown(content, tokenizer=None, chunk_size=300, chunk_overlap=100):
|
|
83
|
+
"""
|
|
84
|
+
Découpe un contenu Markdown en chunks :
|
|
85
|
+
- Si des en-têtes Markdown (#, ##, ###...) existent : on respecte la hiérarchie
|
|
86
|
+
et on inclut dans les métadonnées uniquement les titres de la branche courante.
|
|
87
|
+
- Sinon : on délègue à chunk_words().
|
|
88
|
+
|
|
89
|
+
Parameters
|
|
90
|
+
----------
|
|
91
|
+
content : str
|
|
92
|
+
Le contenu (Markdown ou texte brut).
|
|
93
|
+
tokenizer : any
|
|
94
|
+
Tokenizer utilisé par WordChunker si besoin.
|
|
95
|
+
chunk_size : int
|
|
96
|
+
Nombre max de mots par chunk.
|
|
97
|
+
chunk_overlap : int
|
|
98
|
+
Overlap (en mots) entre deux chunks consécutifs.
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
(chunks, metadatas) : tuple(list[str], list[str])
|
|
103
|
+
chunks : segments de texte
|
|
104
|
+
metadatas : hiérarchies de titres associées (chaînes " ; " séparées), vide si aucun titre.
|
|
105
|
+
"""
|
|
106
|
+
if not content or not isinstance(content, str):
|
|
107
|
+
return [], []
|
|
108
|
+
|
|
109
|
+
header_regex = re.compile(r"^(#{1,6})\s+(.*)", re.MULTILINE)
|
|
110
|
+
matches = list(header_regex.finditer(content))
|
|
111
|
+
|
|
112
|
+
# Cas SANS en-têtes : appel direct à chunk_words
|
|
113
|
+
if not matches:
|
|
114
|
+
chunks, _ = chunk_words(content, tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
115
|
+
return chunks, [""] * len(chunks)
|
|
116
|
+
|
|
117
|
+
# Cas AVEC en-têtes : extraire les sections (level, title, body)
|
|
118
|
+
sections = []
|
|
119
|
+
for i, match in enumerate(matches):
|
|
120
|
+
level = len(match.group(1))
|
|
121
|
+
title = match.group(2).strip()
|
|
122
|
+
start = match.end()
|
|
123
|
+
end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
|
|
124
|
+
body = content[start:end].strip()
|
|
125
|
+
sections.append((level, title, body))
|
|
126
|
+
|
|
127
|
+
chunks, metadatas = [], []
|
|
128
|
+
current_titles = {}
|
|
129
|
+
|
|
130
|
+
for level, title, body in sections:
|
|
131
|
+
# purge les niveaux >= level
|
|
132
|
+
for l in list(current_titles.keys()):
|
|
133
|
+
if l >= level:
|
|
134
|
+
current_titles.pop(l, None)
|
|
135
|
+
current_titles[level] = title
|
|
136
|
+
|
|
137
|
+
metadata = " ; ".join(current_titles[lvl] for lvl in sorted(current_titles) if lvl <= level)
|
|
138
|
+
|
|
139
|
+
# déléguer le découpage de body à chunk_words
|
|
140
|
+
body_chunks, _ = chunk_words(body, tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
141
|
+
|
|
142
|
+
for ch in body_chunks:
|
|
143
|
+
chunks.append(ch)
|
|
144
|
+
metadatas.append(metadata)
|
|
145
|
+
|
|
146
|
+
return chunks, metadatas
|
|
147
|
+
|
|
148
|
+
def chunk_semantic():
|
|
149
|
+
pass
|
|
@@ -122,8 +122,8 @@ def lemmatize(text, model):
|
|
|
122
122
|
# Modify the infix patterns (patterns for token splits)
|
|
123
123
|
infixes = list(model.Defaults.infixes)
|
|
124
124
|
# Add custom pattern for numbers with special characters
|
|
125
|
-
infixes.append("(?=.*[A-Za-z])(?=.*\d)[A-Za-z\d]+(?:[-.#][A-Za-z\d]+)*")
|
|
126
|
-
infixes.append("[^\s]*")
|
|
125
|
+
infixes.append(r"(?=.*[A-Za-z])(?=.*\d)[A-Za-z\d]+(?:[-.#][A-Za-z\d]+)*")
|
|
126
|
+
infixes.append(r"[^\s]*")
|
|
127
127
|
# Recompile the infix pattern after adding the custom one
|
|
128
128
|
model.tokenizer.infix_finditer = compile_infix_regex(infixes).finditer
|
|
129
129
|
document = model(text)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "markdown",
|
|
3
|
+
"description": "Recipe for markdown documents in English",
|
|
4
|
+
"schema": "Markdown",
|
|
5
|
+
"language": "en",
|
|
6
|
+
"metadata": {
|
|
7
|
+
"version": "0.1.0",
|
|
8
|
+
"author": "Chonkie Team"
|
|
9
|
+
},
|
|
10
|
+
"recipe": {
|
|
11
|
+
"delimiters": [
|
|
12
|
+
". ",
|
|
13
|
+
"! ",
|
|
14
|
+
"? ",
|
|
15
|
+
"\n"
|
|
16
|
+
],
|
|
17
|
+
"include_delim": "prev",
|
|
18
|
+
"recursive_rules": {
|
|
19
|
+
"levels": [
|
|
20
|
+
{
|
|
21
|
+
"delimiters": [
|
|
22
|
+
"######",
|
|
23
|
+
"#####",
|
|
24
|
+
"####",
|
|
25
|
+
"###",
|
|
26
|
+
"##",
|
|
27
|
+
"#"
|
|
28
|
+
],
|
|
29
|
+
"whitespace": false,
|
|
30
|
+
"include_delim": "next"
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"delimiters": [
|
|
34
|
+
"\n\n",
|
|
35
|
+
"\n\r"
|
|
36
|
+
],
|
|
37
|
+
"whitespace": false,
|
|
38
|
+
"include_delim": "prev"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"delimiters": [
|
|
42
|
+
"\n",
|
|
43
|
+
"\r"
|
|
44
|
+
],
|
|
45
|
+
"whitespace": false,
|
|
46
|
+
"include_delim": "prev"
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"delimiters": [
|
|
50
|
+
". ",
|
|
51
|
+
"! ",
|
|
52
|
+
"? "
|
|
53
|
+
],
|
|
54
|
+
"whitespace": false,
|
|
55
|
+
"include_delim": "prev"
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"delimiters": null,
|
|
59
|
+
"whitespace": false,
|
|
60
|
+
"include_delim": "prev"
|
|
61
|
+
}
|
|
62
|
+
]
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
@@ -83,7 +83,7 @@ def create_index_file(in_repo_file,out_repo_file="a_ignorer"):
|
|
|
83
83
|
|
|
84
84
|
def decode(repo_file,file_to_read):
|
|
85
85
|
"""
|
|
86
|
-
|
|
86
|
+
be carrefull with big file (ram saturation)
|
|
87
87
|
return containt of a zipped file
|
|
88
88
|
"""
|
|
89
89
|
if not os.path.isfile(repo_file):
|
|
@@ -144,8 +144,8 @@ def decode_to_file(zip_path, target_path, output_path):
|
|
|
144
144
|
def normalize_path(path):
|
|
145
145
|
"""
|
|
146
146
|
Normalize paths for URLs and local usage:
|
|
147
|
-
- Replaces backslashes
|
|
148
|
-
- Removes './' and '
|
|
147
|
+
- Replaces backslashes with forward slashes .
|
|
148
|
+
- Removes './' and '\\.' segments from paths.
|
|
149
149
|
- Handles redundant slashes.
|
|
150
150
|
"""
|
|
151
151
|
# Replace backslashes with slashes
|
|
@@ -9,7 +9,7 @@ import shlex
|
|
|
9
9
|
def open_hide_terminal(command, with_qt=True, env=None):
|
|
10
10
|
"""
|
|
11
11
|
Ouvre un nouveau terminal indépendant et exécute la commande spécifiée.
|
|
12
|
-
|
|
12
|
+
attention command est une liste
|
|
13
13
|
:param command: La liste de commande à exécuter.
|
|
14
14
|
:param with_qt: Désactive l'affichage si False (utile pour les applications Qt sans affichage).
|
|
15
15
|
:param hide_terminal: Masque la fenêtre du terminal si True.
|
|
@@ -102,7 +102,7 @@ def open_hide_terminal(command, with_qt=True, env=None):
|
|
|
102
102
|
|
|
103
103
|
def open_terminal(command, with_qt=True, env=None):
|
|
104
104
|
"""
|
|
105
|
-
|
|
105
|
+
attention command est une liste
|
|
106
106
|
Ouvre un nouveau terminal indépendant et exécute la commande spécifiée.
|
|
107
107
|
:param command: La liste de commande à exécuter.
|
|
108
108
|
:param with_qt: Désactive l'affichage si False (utile pour les applications Qt sans affichage).
|
|
File without changes
|
|
@@ -246,7 +246,8 @@ if os.name=='nt':
|
|
|
246
246
|
cb_after = EnumWindowsProc(_enum_after) # garder une référence !
|
|
247
247
|
user32.EnumWindows(cb_after, 0)
|
|
248
248
|
|
|
249
|
-
|
|
249
|
+
# peut etre a remettre (pyflakes warning)
|
|
250
|
+
#new_hwnds = list(set(hwnd_list_after) - set(hwnd_list_before))
|
|
250
251
|
|
|
251
252
|
# ... le reste de ton code (détection de la fenêtre, thread enforce_modal, parsing buffer) ...
|
|
252
253
|
buffer_content = ctypes.wstring_at(ctypes.addressof(file_buffer), file_buffer._length_)
|
|
@@ -11,18 +11,15 @@ from AnyQt.QtWidgets import (QComboBox, QDialog, QGroupBox, QHBoxLayout,
|
|
|
11
11
|
from Orange.widgets import widget
|
|
12
12
|
|
|
13
13
|
if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
|
|
14
|
-
from Orange.widgets.orangecontrib.AAIT.fix_torch import fix_torch_dll_error
|
|
15
14
|
from Orange.widgets.orangecontrib.AAIT.utils import (MetManagement,
|
|
16
15
|
SimpleDialogQt)
|
|
17
16
|
from Orange.widgets.orangecontrib.AAIT.utils.MetManagement import GetFromRemote
|
|
18
17
|
from Orange.widgets.orangecontrib.AAIT.utils.initialize_from_ini import apply_modification_from_python_file
|
|
19
18
|
else:
|
|
20
|
-
from orangecontrib.AAIT.fix_torch import fix_torch_dll_error
|
|
21
19
|
from orangecontrib.AAIT.utils import (MetManagement,
|
|
22
20
|
SimpleDialogQt)
|
|
23
21
|
from orangecontrib.AAIT.utils.MetManagement import GetFromRemote
|
|
24
22
|
from orangecontrib.AAIT.utils.initialize_from_ini import apply_modification_from_python_file
|
|
25
|
-
fix_torch_dll_error.fix_error_torch()
|
|
26
23
|
|
|
27
24
|
class RepositoryManager(QDialog):
|
|
28
25
|
def __init__(self, parent=None):
|
|
@@ -33,12 +33,12 @@ class OWChunker(base_widget.BaseListWidget):
|
|
|
33
33
|
# Settings
|
|
34
34
|
chunk_size: str = Setting("300")
|
|
35
35
|
overlap: str = Setting("100")
|
|
36
|
-
mode: str = Setting("
|
|
36
|
+
mode: str = Setting("Token")
|
|
37
37
|
selected_column_name = Setting("content")
|
|
38
38
|
|
|
39
39
|
class Inputs:
|
|
40
40
|
data = Input("Data", Orange.data.Table)
|
|
41
|
-
model = Input("
|
|
41
|
+
model = Input("Tokenizer", SentenceTransformer, auto_summary=False)
|
|
42
42
|
|
|
43
43
|
class Outputs:
|
|
44
44
|
data = Output("Chunked Data", Orange.data.Table)
|
|
@@ -66,15 +66,15 @@ class OWChunker(base_widget.BaseListWidget):
|
|
|
66
66
|
self.setFixedHeight(490)
|
|
67
67
|
#uic.loadUi(self.gui, self)
|
|
68
68
|
|
|
69
|
-
|
|
69
|
+
# Chunking method
|
|
70
70
|
self.edit_mode = self.findChild(QComboBox, "comboBox")
|
|
71
|
-
self.edit_mode.
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
self.edit_mode.currentIndexChanged.connect(self.update_edit_mode)
|
|
71
|
+
self.edit_mode.setCurrentText(self.mode)
|
|
72
|
+
self.edit_mode.currentTextChanged.connect(self.update_edit_mode)
|
|
73
|
+
# Chunk size
|
|
75
74
|
self.edit_chunkSize = self.findChild(QLineEdit, 'chunkSize')
|
|
76
75
|
self.edit_chunkSize.setText(str(self.chunk_size))
|
|
77
76
|
self.edit_chunkSize.textChanged.connect(self.update_chunk_size)
|
|
77
|
+
# Chunk overlap
|
|
78
78
|
self.edit_overlap = self.findChild(QLineEdit, 'QLoverlap')
|
|
79
79
|
self.edit_overlap.setText(str(self.overlap))
|
|
80
80
|
self.edit_overlap.textChanged.connect(self.update_overlap)
|
|
@@ -98,9 +98,8 @@ class OWChunker(base_widget.BaseListWidget):
|
|
|
98
98
|
def update_overlap(self, text):
|
|
99
99
|
self.overlap = text
|
|
100
100
|
|
|
101
|
-
def update_edit_mode(self,
|
|
102
|
-
|
|
103
|
-
self.mode = selected
|
|
101
|
+
def update_edit_mode(self, text):
|
|
102
|
+
self.mode = text
|
|
104
103
|
|
|
105
104
|
def run(self):
|
|
106
105
|
self.error("")
|
|
@@ -88,8 +88,7 @@ class OWConverseLLM(widget.OWWidget):
|
|
|
88
88
|
def __init__(self):
|
|
89
89
|
super().__init__()
|
|
90
90
|
# Qt Management
|
|
91
|
-
self.
|
|
92
|
-
self.setFixedHeight(750)
|
|
91
|
+
self.resize(840, 750)
|
|
93
92
|
uic.loadUi(self.gui, self)
|
|
94
93
|
# Context
|
|
95
94
|
self.lineEdit_n_ctx = self.findChild(QLineEdit, "lineEdit")
|
|
@@ -38,7 +38,6 @@ from orangecanvas.gui.utils import disconnected
|
|
|
38
38
|
from orangewidget.utils.listview import ListViewSearch
|
|
39
39
|
|
|
40
40
|
from orangecontrib.text.corpus import Corpus
|
|
41
|
-
from Orange.widgets.settings import Setting
|
|
42
41
|
if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
|
|
43
42
|
from Orange.widgets.orangecontrib.AAIT.utils import widget_positioning
|
|
44
43
|
else:
|
|
@@ -29,11 +29,11 @@ class OWEmptySwitch(widget.OWWidget):
|
|
|
29
29
|
priority = 1060
|
|
30
30
|
|
|
31
31
|
class Inputs:
|
|
32
|
-
data = Input("Data",
|
|
32
|
+
data = Input("Data", object)
|
|
33
33
|
|
|
34
34
|
class Outputs:
|
|
35
35
|
data = Output("Data", Orange.data.Table)
|
|
36
|
-
trigger = Output("Trigger",
|
|
36
|
+
trigger = Output("Trigger", object)
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
@Inputs.data
|
|
@@ -8,11 +8,9 @@ from Orange.widgets.utils.signals import Input, Output
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
|
|
11
|
-
from Orange.widgets.orangecontrib.AAIT.utils.import_uic import uic
|
|
12
11
|
from Orange.widgets.orangecontrib.AAIT.utils import base_widget
|
|
13
12
|
from Orange.widgets.orangecontrib.AAIT.utils.initialize_from_ini import apply_modification_from_python_file
|
|
14
13
|
else:
|
|
15
|
-
from orangecontrib.AAIT.utils.import_uic import uic
|
|
16
14
|
from orangecontrib.AAIT.utils.initialize_from_ini import apply_modification_from_python_file
|
|
17
15
|
from orangecontrib.AAIT.utils import base_widget
|
|
18
16
|
|
|
@@ -68,6 +66,8 @@ class OWExtractTokens(base_widget.BaseListWidget):
|
|
|
68
66
|
|
|
69
67
|
if not self.selected_column_name in self.data.domain:
|
|
70
68
|
self.warning(f'Previously selected column "{self.selected_column_name}" does not exist in your data.')
|
|
69
|
+
self.Outputs.data_row.send(None)
|
|
70
|
+
self.Outputs.data_column.send(None)
|
|
71
71
|
return
|
|
72
72
|
|
|
73
73
|
if not hasattr(self.data, "tokens"):
|
|
@@ -150,6 +150,7 @@ class OWFileWithPath(base_widget.BaseListWidget):
|
|
|
150
150
|
|
|
151
151
|
|
|
152
152
|
if self.filepath is None:
|
|
153
|
+
self.Outputs.data.send(None)
|
|
153
154
|
return
|
|
154
155
|
|
|
155
156
|
self.filepath = self.filepath.strip('"')
|
|
@@ -161,10 +162,12 @@ class OWFileWithPath(base_widget.BaseListWidget):
|
|
|
161
162
|
# Verification of in_data
|
|
162
163
|
if not self.selected_column_name in self.in_path_table.domain:
|
|
163
164
|
self.warning(f'Previously selected column "{self.selected_column_name}" does not exist in your data.')
|
|
165
|
+
self.Outputs.data.send(None)
|
|
164
166
|
return
|
|
165
167
|
|
|
166
168
|
if not isinstance(self.in_path_table.domain[self.selected_column_name], StringVariable):
|
|
167
169
|
self.error('You must select a text variable.')
|
|
170
|
+
self.Outputs.data.send(None)
|
|
168
171
|
return
|
|
169
172
|
|
|
170
173
|
if self.strloadAsString!="False":
|
|
@@ -175,6 +178,7 @@ class OWFileWithPath(base_widget.BaseListWidget):
|
|
|
175
178
|
out_data.name = self.filepath
|
|
176
179
|
self.Outputs.data.send(out_data)
|
|
177
180
|
|
|
181
|
+
|
|
178
182
|
def post_initialized(self):
|
|
179
183
|
pass
|
|
180
184
|
|
|
@@ -306,6 +306,7 @@ class OWFileFromDir(base_widget.BaseListWidget):
|
|
|
306
306
|
|
|
307
307
|
if not self.selected_column_name in self.data.domain:
|
|
308
308
|
self.warning(f'Previously selected column "{self.selected_column_name}" does not exist in your data.')
|
|
309
|
+
self.Outputs.data.send(None)
|
|
309
310
|
return
|
|
310
311
|
|
|
311
312
|
self.folderpath = self.data.get_column(self.selected_column_name)
|
|
@@ -1,22 +1,17 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import sys
|
|
3
3
|
import re
|
|
4
|
-
import gc
|
|
5
|
-
from datetime import datetime
|
|
6
4
|
from typing import Optional,Dict,List,Tuple
|
|
7
5
|
from AnyQt.QtWidgets import QTableWidgetItem
|
|
8
6
|
|
|
9
7
|
from Orange.widgets.settings import Setting
|
|
10
8
|
import Orange.data
|
|
11
|
-
from Orange.data import Table
|
|
12
|
-
from AnyQt.QtWidgets import QApplication
|
|
9
|
+
from Orange.data import Table
|
|
10
|
+
from AnyQt.QtWidgets import QApplication
|
|
13
11
|
from Orange.widgets import widget
|
|
14
|
-
from Orange.widgets.widget import Input, MultiInput, Output
|
|
15
12
|
|
|
16
13
|
# Import des bibliothèques Word
|
|
17
14
|
from docx import Document
|
|
18
|
-
from docx.enum.style import WD_STYLE_TYPE
|
|
19
|
-
from docx.shared import Pt
|
|
20
15
|
from docx.oxml.ns import qn
|
|
21
16
|
from docx.oxml import OxmlElement
|
|
22
17
|
if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
|
|
@@ -105,10 +105,12 @@ class OWGetPages(OWWidget):
|
|
|
105
105
|
self.error(None)
|
|
106
106
|
if not "path" in self.data.domain:
|
|
107
107
|
self.error('You don\'t have "path" column in your input data.')
|
|
108
|
+
self.Outputs.data.send(None)
|
|
108
109
|
return
|
|
109
110
|
|
|
110
111
|
if not "Chunks" in self.data.domain:
|
|
111
112
|
self.error('You don\'t have "Chunks" column in your input data.')
|
|
113
|
+
self.Outputs.data.send(None)
|
|
112
114
|
return
|
|
113
115
|
pages = []
|
|
114
116
|
for row in self.data:
|