aait 2.3.15.995__tar.gz → 2.3.15.997__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aait-2.3.15.997/PKG-INFO +8 -0
- aait-2.3.15.997/aait.egg-info/PKG-INFO +8 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/aait.egg-info/SOURCES.txt +5 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/__init__.py +40 -3
- aait-2.3.15.997/orangecontrib/AAIT/llm/chunking.py +113 -0
- aait-2.3.15.997/orangecontrib/AAIT/llm/wordchunker_deprecated.py +333 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/MetManagement.py +42 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/base_widget.py +11 -3
- aait-2.3.15.997/orangecontrib/AAIT/utils/tools/TigerODM_notepad.py +1 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWChunking.py +17 -7
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWConverseLLM.py +2 -1
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWLLMEngine.py +16 -6
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWLargeLanguageModel.py +10 -6
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWLoadDocuments.py +1 -1
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_Embeddings.py +12 -4
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_Falcon.py +7 -4
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_Mistral.py +7 -4
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_Qwen.py +7 -4
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_Qwen1B5_Q6.py +7 -4
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_Qwen2_5_32B.py +7 -4
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_Qwen3B_Q4.py +7 -4
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q4.py +7 -4
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q6.py +7 -4
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_Solar.py +7 -4
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_SolarUncensored.py +7 -4
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWSplitExcelSheets.py +1 -1
- aait-2.3.15.997/orangecontrib/AAIT/widgets/OWSplitPath.py +135 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owchunking.ui +8 -3
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owloaddocuments.ui +1 -1
- aait-2.3.15.997/orangecontrib/AAIT/widgets/designer/owsplitpath.ui +131 -0
- aait-2.3.15.997/orangecontrib/AAIT/widgets/icons/split_path.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/setup.py +1 -1
- aait-2.3.15.995/PKG-INFO +0 -32
- aait-2.3.15.995/aait.egg-info/PKG-INFO +0 -32
- aait-2.3.15.995/orangecontrib/AAIT/llm/chunking.py +0 -149
- {aait-2.3.15.995 → aait-2.3.15.997}/aait.egg-info/dependency_links.txt +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/aait.egg-info/entry_points.txt +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/aait.egg-info/namespace_packages.txt +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/aait.egg-info/requires.txt +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/aait.egg-info/top_level.txt +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/llm/__init__.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/llm/answers.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/llm/answers_llama.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/llm/embeddings.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/llm/lemmes.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/llm/lmstudio.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/llm/process_documents.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/llm/prompt_management.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/llm/resources/__ini__.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/llm/resources/markdown_recipe.json +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/llm/translations.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/optimiser/__init__.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/optimiser/optuna_multi.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/OperationSystem.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/SimpleDialogQt.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/__init__.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/aait_repo_file.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/aait_table_viewer.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/delta_local_shared_fodler.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/import_uic.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/initialize_from_ini.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/mac_utils.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/shared_functions.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/shared_variables.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/subprocess_management.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/thread_management.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/tools/__init__.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/tools/change_owcorpus.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/tools/concat_splitted_pypi.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/tools/first_time_check.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/tools/owcorpus_ok.txt +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/unlink_table_domain.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/widget_positioning.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/utils/windows_utils.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWAAITResourcesManager.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWAccumulator.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWAddColumns.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWApplyRules.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWAutoShowCreateInstance.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWAutoShowTable.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWCN2rule_view.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWConcatRules.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWCreateEmbeddings.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWDisplayMD.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWEditTable.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWEmptySwitch.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWEndLoop.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWExecuteScript.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWExtraChunks.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWExtractTokens.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWFileMetadata.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWFileSyncChecker.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWFileWithPath.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWFindFilesFromDir.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWFusionNM.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWGenerateQuestions.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWGenerateSynthesis.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWGenerateWord.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWGetPages.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWInputSelector.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWKeywords.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWKeywordsDetection.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWLMStudio.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWLanguageDetection.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWLemmatizer.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_CE_MiniLML6.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_HelsinkiEnFr.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_HelsinkiFrEn.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_MPNET.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_EN.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_FR.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWOperationSystem.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWOptimisation.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWOptimisationSelection.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWOptimisationSendScore.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWProcessDocumentsFromPath.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWQuadrantclicker.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWQueryLLM.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWRandomData.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWReranking.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWSaveFilepathEntry.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWSelectColumnDynamique.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWSelectRowsDynamic.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWSortAndSelect.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWStartLoop.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWTable2Corpus.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWTranslation.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWTrigger.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/OWUnzipFolder.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/POW_Wfactory.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/__init__.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owMarkdown.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/ow_OperationSystem.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/ow_in_or_out_path.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/ow_widget_random_data.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owaccumulator.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owaddcolumns.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owapplyrules.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owconcatrules.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owconversellm.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owedgellm.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owembeddings.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owemptyswitch.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owendloop.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owexecutescript.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owexecutescript_TEST.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owextrachunks.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owextracttokens.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owfilemetadata.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owfilesyncchecker.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owfilewithpath.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owfindfilesfromdir.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owfusion_nm.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owgenerate_word.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owgeneratequestions.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owgeneratesynthesis.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owgetpages.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owkeyword.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owkeywordsdetection.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owlangdetect.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owlargelanguagemodel.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owlemmatizer.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owllm4all.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owloadworkflow.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_ce_minilml6.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_embeddings.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_falcon.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_en_fr.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_fr_en.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_mistral.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_mpnet.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_qwen.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_2.5_32b.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_1.5b_q6.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_3b_q4.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_7b_q4.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_7b_q6.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_qwencoder_7b.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_solar.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_solar_uncensored.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_en.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_fr.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/ownumberpointinrules.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owprocessdocuments.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owquadrant_clicker.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owqueryllm.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owreranking.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owsavewithpath.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owselect_column_dynamic.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owselect_row_dynamic.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owsortandselect.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owsplitexcelsheets.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owstartloop.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owtable2corpus.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owtranslation.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owtrigger.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/designer/owunzipfolder.ui +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/CN2RuleViewer.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/CreateInstance.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/MDViewer.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/Mistral.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/Table.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/apply_rules.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/blue_down_arrow.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/book.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/category.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/dark_green.txt +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/de.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/document_generator.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/documents.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/edge_llm.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/endloop.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/extra_chunks.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/green_check.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/in_or_out.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/input.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/languages.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/lm_studio.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/local_interf_img_multi_pull.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/local_interf_multi_pull.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/local_interf_pull.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/local_interf_push.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/local_interf_text_pull.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/logo_solar.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/logo_uncensoredsolar.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/logo_upload.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/models.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/operationSystem.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/optimisation.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/optimizer.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/output.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owCN2_intersect_rules.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owaccumulator.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owchunking.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owconversellm.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owedittable.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owembeddings.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owemptyswitch.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owenvinfo.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owexecutescript.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owextracttokens.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owfilemetadata.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owfilesfromdir.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owfilesyncchecker.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owfilewithpath.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owfusion_nm.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owgeneratequestions.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owgeneratesynthesis.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owkeywords.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owkeywordsdetection.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owlargelanguagemodel.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owlemmatizer.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owloaddocuments.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owmodel_ce_minilml6.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owmodel_embeddings.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owmodel_falcon.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_en_fr.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_fr_en.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owmodel_mpnet.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_en.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_fr.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owoptimisation.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owoptimisationselection.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owqueryllm.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owreranking.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owsavefilepathentry.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owselectcolumndynamique.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owsortandselect.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owtable2corpus.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owtranslation.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/owtrigger.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/processdocuments.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/quadrantclicker.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/qwen-color.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/select_dynamic_row.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/splitexcelsheets.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/startloop.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/tools.png +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/widgetFactory.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/AAIT/widgets/icons/zip.svg +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/orangecontrib/__init__.py +0 -0
- {aait-2.3.15.995 → aait-2.3.15.997}/setup.cfg +0 -0
aait-2.3.15.997/PKG-INFO
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: aait
|
|
3
|
+
Version: 2.3.15.997
|
|
4
|
+
Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
|
|
5
|
+
Home-page:
|
|
6
|
+
Author: Orange community
|
|
7
|
+
Author-email:
|
|
8
|
+
Keywords: orange3 add-on
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: aait
|
|
3
|
+
Version: 2.3.15.997
|
|
4
|
+
Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
|
|
5
|
+
Home-page:
|
|
6
|
+
Author: Orange community
|
|
7
|
+
Author-email:
|
|
8
|
+
Keywords: orange3 add-on
|
|
@@ -18,6 +18,7 @@ orangecontrib/AAIT/llm/lmstudio.py
|
|
|
18
18
|
orangecontrib/AAIT/llm/process_documents.py
|
|
19
19
|
orangecontrib/AAIT/llm/prompt_management.py
|
|
20
20
|
orangecontrib/AAIT/llm/translations.py
|
|
21
|
+
orangecontrib/AAIT/llm/wordchunker_deprecated.py
|
|
21
22
|
orangecontrib/AAIT/llm/resources/__ini__.py
|
|
22
23
|
orangecontrib/AAIT/llm/resources/markdown_recipe.json
|
|
23
24
|
orangecontrib/AAIT/optimiser/__init__.py
|
|
@@ -40,6 +41,7 @@ orangecontrib/AAIT/utils/thread_management.py
|
|
|
40
41
|
orangecontrib/AAIT/utils/unlink_table_domain.py
|
|
41
42
|
orangecontrib/AAIT/utils/widget_positioning.py
|
|
42
43
|
orangecontrib/AAIT/utils/windows_utils.py
|
|
44
|
+
orangecontrib/AAIT/utils/tools/TigerODM_notepad.py
|
|
43
45
|
orangecontrib/AAIT/utils/tools/__init__.py
|
|
44
46
|
orangecontrib/AAIT/utils/tools/change_owcorpus.py
|
|
45
47
|
orangecontrib/AAIT/utils/tools/concat_splitted_pypi.py
|
|
@@ -112,6 +114,7 @@ orangecontrib/AAIT/widgets/OWSelectColumnDynamique.py
|
|
|
112
114
|
orangecontrib/AAIT/widgets/OWSelectRowsDynamic.py
|
|
113
115
|
orangecontrib/AAIT/widgets/OWSortAndSelect.py
|
|
114
116
|
orangecontrib/AAIT/widgets/OWSplitExcelSheets.py
|
|
117
|
+
orangecontrib/AAIT/widgets/OWSplitPath.py
|
|
115
118
|
orangecontrib/AAIT/widgets/OWStartLoop.py
|
|
116
119
|
orangecontrib/AAIT/widgets/OWTable2Corpus.py
|
|
117
120
|
orangecontrib/AAIT/widgets/OWTranslation.py
|
|
@@ -182,6 +185,7 @@ orangecontrib/AAIT/widgets/designer/owselect_column_dynamic.ui
|
|
|
182
185
|
orangecontrib/AAIT/widgets/designer/owselect_row_dynamic.ui
|
|
183
186
|
orangecontrib/AAIT/widgets/designer/owsortandselect.ui
|
|
184
187
|
orangecontrib/AAIT/widgets/designer/owsplitexcelsheets.ui
|
|
188
|
+
orangecontrib/AAIT/widgets/designer/owsplitpath.ui
|
|
185
189
|
orangecontrib/AAIT/widgets/designer/owstartloop.ui
|
|
186
190
|
orangecontrib/AAIT/widgets/designer/owtable2corpus.ui
|
|
187
191
|
orangecontrib/AAIT/widgets/designer/owtranslation.ui
|
|
@@ -265,6 +269,7 @@ orangecontrib/AAIT/widgets/icons/processdocuments.svg
|
|
|
265
269
|
orangecontrib/AAIT/widgets/icons/quadrantclicker.svg
|
|
266
270
|
orangecontrib/AAIT/widgets/icons/qwen-color.png
|
|
267
271
|
orangecontrib/AAIT/widgets/icons/select_dynamic_row.png
|
|
272
|
+
orangecontrib/AAIT/widgets/icons/split_path.png
|
|
268
273
|
orangecontrib/AAIT/widgets/icons/splitexcelsheets.png
|
|
269
274
|
orangecontrib/AAIT/widgets/icons/startloop.png
|
|
270
275
|
orangecontrib/AAIT/widgets/icons/tools.png
|
|
@@ -42,11 +42,11 @@ if not check_executable_length():
|
|
|
42
42
|
exit(0)
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
target_version = version.parse("3.
|
|
45
|
+
target_version = version.parse("3.40")
|
|
46
46
|
current_version = version.parse(Orange.version.version)
|
|
47
47
|
if current_version < target_version: # Skip the file
|
|
48
48
|
print("Orange version not compatible with all of AAIT functions !")
|
|
49
|
-
|
|
49
|
+
SimpleDialogQt.BoxError("Orange version not compatible with all of AAIT functions !")
|
|
50
50
|
|
|
51
51
|
else: # Execute the file
|
|
52
52
|
import os
|
|
@@ -330,4 +330,41 @@ else: # Execute the file
|
|
|
330
330
|
import_proprietary_categories()
|
|
331
331
|
duplicate_widget_if_needed_exept_POW_file()
|
|
332
332
|
|
|
333
|
-
duplicate_POW_file()
|
|
333
|
+
duplicate_POW_file()
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
from AnyQt import QtWidgets, QtGui, QtCore
|
|
338
|
+
|
|
339
|
+
def force_native_light_mode():
|
|
340
|
+
# 1. Get the current application instance
|
|
341
|
+
app = QtWidgets.QApplication.instance()
|
|
342
|
+
if not app:
|
|
343
|
+
return
|
|
344
|
+
|
|
345
|
+
# 2. Force the style back to 'Windows' or 'WindowsVista'
|
|
346
|
+
# This prevents Orange from using any custom Dark-specific styles
|
|
347
|
+
app.setStyle(QtWidgets.QStyleFactory.create("WindowsVista"))
|
|
348
|
+
|
|
349
|
+
# 3. Explicitly tell Qt to use the Light Palette
|
|
350
|
+
# We fetch the 'Standard' palette which defaults to Light colors
|
|
351
|
+
light_palette = QtGui.QPalette()
|
|
352
|
+
|
|
353
|
+
# Manually re-assert the Light Mode colors to override OS injection
|
|
354
|
+
light_palette.setColor(QtGui.QPalette.Window, QtGui.QColor(240, 240, 240))
|
|
355
|
+
light_palette.setColor(QtGui.QPalette.WindowText, QtCore.Qt.black)
|
|
356
|
+
light_palette.setColor(QtGui.QPalette.Base, QtCore.Qt.white)
|
|
357
|
+
light_palette.setColor(QtGui.QPalette.AlternateBase, QtGui.QColor(233, 233, 233))
|
|
358
|
+
light_palette.setColor(QtGui.QPalette.ToolTipBase, QtCore.Qt.white)
|
|
359
|
+
light_palette.setColor(QtGui.QPalette.ToolTipText, QtCore.Qt.black)
|
|
360
|
+
light_palette.setColor(QtGui.QPalette.Text, QtCore.Qt.black)
|
|
361
|
+
light_palette.setColor(QtGui.QPalette.Button, QtGui.QColor(240, 240, 240))
|
|
362
|
+
light_palette.setColor(QtGui.QPalette.ButtonText, QtCore.Qt.black)
|
|
363
|
+
light_palette.setColor(QtGui.QPalette.BrightText, QtCore.Qt.red)
|
|
364
|
+
light_palette.setColor(QtGui.QPalette.Link, QtGui.QColor(0, 0, 255))
|
|
365
|
+
light_palette.setColor(QtGui.QPalette.Highlight, QtGui.QColor(0, 120, 215))
|
|
366
|
+
light_palette.setColor(QtGui.QPalette.HighlightedText, QtCore.Qt.white)
|
|
367
|
+
|
|
368
|
+
app.setPalette(light_palette)
|
|
369
|
+
|
|
370
|
+
force_native_light_mode()
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import re
|
|
3
|
+
import os
|
|
4
|
+
import pathlib
|
|
5
|
+
import Orange
|
|
6
|
+
from Orange.data import Domain, Table, StringVariable, ContinuousVariable
|
|
7
|
+
from sentence_transformers import SentenceTransformer
|
|
8
|
+
|
|
9
|
+
### Chonkie
|
|
10
|
+
from chonkie import TokenChunker, SentenceChunker, RecursiveChunker, SemanticChunker, LateChunker, CodeChunker
|
|
11
|
+
if "site-packages/Orange/widgets" in os.path.dirname(os.path.abspath(__file__)).replace("\\", "/"):
|
|
12
|
+
from Orange.widgets.orangecontrib.AAIT.llm import wordchunker_deprecated
|
|
13
|
+
from Orange.widgets.orangecontrib.AAIT.utils.MetManagement import get_local_store_path
|
|
14
|
+
else:
|
|
15
|
+
from orangecontrib.AAIT.llm import wordchunker_deprecated
|
|
16
|
+
from orangecontrib.AAIT.utils.MetManagement import get_local_store_path
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def create_chunks(table, column_name, tokenizer="character", chunk_size=300, chunk_overlap=100, mode="tokens",
|
|
20
|
+
progress_callback=None, argself=None):
|
|
21
|
+
"""
|
|
22
|
+
Chunk the text in `column_name` of an Orange Table using a specialized chunker.
|
|
23
|
+
|
|
24
|
+
Splits each row's text into chunks based on the selected mode (Token, Sentence,
|
|
25
|
+
Recursive, or Markdown). Adds the chunked text and its metadata as new meta
|
|
26
|
+
columns to the table.
|
|
27
|
+
|
|
28
|
+
Parameters:
|
|
29
|
+
table (Table): Input data table.
|
|
30
|
+
column_name (str): Name of the text column to chunk.
|
|
31
|
+
tokenizer (str): Tokenizer type (e.g., "character").
|
|
32
|
+
chunk_size (int): Target chunk size.
|
|
33
|
+
chunk_overlap (int): Overlap between chunks (not used in all modes).
|
|
34
|
+
mode (str): Chunking strategy ("Token", "Sentence", "Recursive", "Markdown").
|
|
35
|
+
progress_callback (callable): Optional progress reporter.
|
|
36
|
+
argself: Optional caller reference.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Table: The table with added meta columns: "Chunks", "Chunks size", and "Metadata".
|
|
40
|
+
"""
|
|
41
|
+
print("This widget is being updated : default tokenizer 'character' enabled for compatibility !!")
|
|
42
|
+
tokenizer = "character"
|
|
43
|
+
|
|
44
|
+
# Définir la fonction de chunking selon le mode
|
|
45
|
+
if mode == "tokens":
|
|
46
|
+
chunker = TokenChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
47
|
+
elif mode == "words":
|
|
48
|
+
path_ugly = os.path.join(get_local_store_path(), "Models", "NLP", "all-mpnet-base-v2")
|
|
49
|
+
tokenizer_word = SentenceTransformer(path_ugly, device="cpu")
|
|
50
|
+
chunker = wordchunker_deprecated.WordChunker(tokenizer=tokenizer_word, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
51
|
+
elif mode == "sentence":
|
|
52
|
+
chunker = SentenceChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap,
|
|
53
|
+
min_sentences_per_chunk=1)
|
|
54
|
+
elif mode == "markdown":
|
|
55
|
+
markdown_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "resources/markdown_recipe.json")
|
|
56
|
+
chunker = RecursiveChunker.from_recipe(path=markdown_path,
|
|
57
|
+
tokenizer=tokenizer,
|
|
58
|
+
chunk_size=chunk_size,
|
|
59
|
+
min_characters_per_chunk=1)
|
|
60
|
+
|
|
61
|
+
# TODO : Ajouter la gestion des paramètres dans le .ui
|
|
62
|
+
# Nécessite des "rules" pour faire un chunking différent de Token ou Sentence
|
|
63
|
+
elif mode == "Recursive":
|
|
64
|
+
chunker = RecursiveChunker(tokenizer=tokenizer, chunk_size=chunk_size, min_characters_per_chunk=24)
|
|
65
|
+
# À tester avant d'ajouter la fonctionnalité dans l'UI
|
|
66
|
+
# Model d'embeddings REQUIS !
|
|
67
|
+
elif mode == "Semantic":
|
|
68
|
+
chunker = SemanticChunker(embedding_model=tokenizer, threshold=0.7, chunk_size=chunk_size, similarity_window=3)
|
|
69
|
+
# À tester avant d'ajouter la fonctionnalité dans l'UI
|
|
70
|
+
# Model d'embeddings REQUIS !
|
|
71
|
+
elif mode == "Late":
|
|
72
|
+
chunker = LateChunker(embedding_model=tokenizer, chunk_size=chunk_size, min_characters_per_chunk=24)
|
|
73
|
+
elif mode == "Code":
|
|
74
|
+
chunker = CodeChunker("blabla")
|
|
75
|
+
else:
|
|
76
|
+
raise ValueError(f"Invalid mode: {mode}. Valid modes are: Token, Sentence, Recursive, Markdown")
|
|
77
|
+
|
|
78
|
+
new_metas = list(table.domain.metas) + [StringVariable("Chunks"),
|
|
79
|
+
ContinuousVariable("Chunks size"),
|
|
80
|
+
ContinuousVariable("Chunks index"),
|
|
81
|
+
ContinuousVariable("Chunks start"),
|
|
82
|
+
ContinuousVariable("Chunks end"),
|
|
83
|
+
StringVariable("Metadata")]
|
|
84
|
+
new_domain = Domain(table.domain.attributes, table.domain.class_vars, new_metas)
|
|
85
|
+
|
|
86
|
+
new_rows = []
|
|
87
|
+
for i, row in enumerate(table):
|
|
88
|
+
content = row[column_name].value
|
|
89
|
+
chunks = chunker(content)
|
|
90
|
+
# For each chunk in the chunked data
|
|
91
|
+
for j, chunk in enumerate(chunks):
|
|
92
|
+
# Build new metas with previous data and the chunk
|
|
93
|
+
new_metas_values = list(row.metas) + [chunk.text,
|
|
94
|
+
chunk.token_count,
|
|
95
|
+
j, # Chunks index
|
|
96
|
+
chunk.start_index,
|
|
97
|
+
chunk.end_index,
|
|
98
|
+
""] # Metadata
|
|
99
|
+
# Create the new row instance
|
|
100
|
+
new_instance = Orange.data.Instance(new_domain,
|
|
101
|
+
[row[x] for x in table.domain.attributes] + [row[y] for y in
|
|
102
|
+
table.domain.class_vars] + new_metas_values)
|
|
103
|
+
# Store the new row
|
|
104
|
+
new_rows.append(new_instance)
|
|
105
|
+
|
|
106
|
+
if progress_callback is not None:
|
|
107
|
+
progress_value = float(100 * (i + 1) / len(table))
|
|
108
|
+
progress_callback(progress_value)
|
|
109
|
+
if argself is not None:
|
|
110
|
+
if argself.stop:
|
|
111
|
+
break
|
|
112
|
+
|
|
113
|
+
return Table.from_list(domain=new_domain, rows=new_rows)
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
# wordchunker_deprecated.py
|
|
2
|
+
# -----------------------------------------------------------------------------
|
|
3
|
+
# Compatibility shim reproducing chonkie==0.4.1 WordChunker behavior (including
|
|
4
|
+
# historical quirks/bugs), while being usable inside chonkie>=1.x pipelines.
|
|
5
|
+
#
|
|
6
|
+
# What you get:
|
|
7
|
+
# - A local WordChunker class with the same logic as chonkie 0.4.1 word chunker.
|
|
8
|
+
# - Preserves newlines: words are spans like r"(\s*\S+)" joined with "".
|
|
9
|
+
# - chunk_size / chunk_overlap are TOKEN budgets (not "word" counts), measured
|
|
10
|
+
# via the provided tokenizer backend.
|
|
11
|
+
# - Reproduces 0.4.1 overlap-loop quirk: iterates range(previous_chunk_length)
|
|
12
|
+
# where previous_chunk_length is a TOKEN count (not word count).
|
|
13
|
+
# - Reproduces 0.4.1 final-chunk quirk: _create_chunk called without current_index
|
|
14
|
+
# (defaults to 0), which can yield start_index via .find from the start.
|
|
15
|
+
#
|
|
16
|
+
# Added for practicality in your AAIT / chonkie>=1.5.2 integration:
|
|
17
|
+
# - WordChunker is callable: chunker(text) == chunker.chunk(text)
|
|
18
|
+
# - Accepts tokenizer="character" (character-count pseudo tokenizer)
|
|
19
|
+
# - Accepts a SentenceTransformer instance as tokenizer (uses .tokenizer underneath)
|
|
20
|
+
#
|
|
21
|
+
# NOTE (important bugfix vs earlier draft):
|
|
22
|
+
# Many HuggingFace tokenizers are *callable* (tokenizer(text) -> BatchEncoding).
|
|
23
|
+
# We must NOT mis-detect them as a generic callable token counter. We detect
|
|
24
|
+
# transformers/tokenizers backends *first* by attributes, then fall back to callable.
|
|
25
|
+
# -----------------------------------------------------------------------------
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import importlib
|
|
30
|
+
import inspect
|
|
31
|
+
import re
|
|
32
|
+
from dataclasses import dataclass
|
|
33
|
+
from typing import Any, Callable, List, Union
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(frozen=True)
|
|
37
|
+
class Chunk:
|
|
38
|
+
text: str
|
|
39
|
+
start_index: int
|
|
40
|
+
end_index: int
|
|
41
|
+
token_count: int
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# -----------------------------
|
|
45
|
+
# Helpers for robust tokenizers
|
|
46
|
+
# -----------------------------
|
|
47
|
+
|
|
48
|
+
def _unwrap_possible_sentence_transformer(obj: Any) -> Any:
|
|
49
|
+
"""
|
|
50
|
+
If `obj` looks like a SentenceTransformer, return its underlying HF tokenizer
|
|
51
|
+
when available. SentenceTransformer usually has `.tokenizer`.
|
|
52
|
+
"""
|
|
53
|
+
if obj is None:
|
|
54
|
+
return obj
|
|
55
|
+
try:
|
|
56
|
+
if hasattr(obj, "tokenizer") and "SentenceTransformer" in type(obj).__name__:
|
|
57
|
+
tok = getattr(obj, "tokenizer", None)
|
|
58
|
+
if tok is not None:
|
|
59
|
+
return tok
|
|
60
|
+
except Exception:
|
|
61
|
+
pass
|
|
62
|
+
return obj
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _make_character_tokenizer() -> Any:
|
|
66
|
+
"""
|
|
67
|
+
Special compatibility: when user passes tokenizer="character", count tokens
|
|
68
|
+
as characters, and support batch encoding.
|
|
69
|
+
"""
|
|
70
|
+
class _CharTokenizer:
|
|
71
|
+
def encode(self, text: str):
|
|
72
|
+
return list(range(len(text)))
|
|
73
|
+
|
|
74
|
+
def encode_batch(self, texts: List[str]):
|
|
75
|
+
return [self.encode(t) for t in texts]
|
|
76
|
+
|
|
77
|
+
return _CharTokenizer()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# -----------------------------
|
|
81
|
+
# Minimal BaseChunker (0.4.1-ish)
|
|
82
|
+
# -----------------------------
|
|
83
|
+
|
|
84
|
+
class BaseChunker:
|
|
85
|
+
"""
|
|
86
|
+
Minimal subset of chonkie.chunker.base.BaseChunker needed by WordChunker 0.4.1
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(self, tokenizer_or_token_counter: Union[str, Any, Callable[[str], int]]):
|
|
90
|
+
tokenizer_or_token_counter = _unwrap_possible_sentence_transformer(tokenizer_or_token_counter)
|
|
91
|
+
|
|
92
|
+
if tokenizer_or_token_counter == "character":
|
|
93
|
+
tokenizer_or_token_counter = _make_character_tokenizer()
|
|
94
|
+
|
|
95
|
+
if isinstance(tokenizer_or_token_counter, str):
|
|
96
|
+
self.tokenizer = self._load_tokenizer(tokenizer_or_token_counter)
|
|
97
|
+
else:
|
|
98
|
+
self.tokenizer = tokenizer_or_token_counter
|
|
99
|
+
|
|
100
|
+
self._tokenizer_backend = self._get_tokenizer_backend()
|
|
101
|
+
self.token_counter = self._get_tokenizer_counter()
|
|
102
|
+
|
|
103
|
+
def _get_tokenizer_backend(self) -> str:
|
|
104
|
+
t = self.tokenizer
|
|
105
|
+
tname = type(t).__name__
|
|
106
|
+
ttype = str(type(t))
|
|
107
|
+
|
|
108
|
+
# 1) transformers-style tokenizer (callable, returns BatchEncoding, has encode, often batch_encode_plus)
|
|
109
|
+
if hasattr(t, "batch_encode_plus") or "transformers" in ttype or "PreTrainedTokenizer" in tname:
|
|
110
|
+
return "transformers"
|
|
111
|
+
|
|
112
|
+
# 2) tokenizers rust-style (has encode_batch with add_special_tokens)
|
|
113
|
+
if "tokenizers" in ttype:
|
|
114
|
+
return "tokenizers"
|
|
115
|
+
|
|
116
|
+
# 3) tiktoken encodings
|
|
117
|
+
if "tiktoken" in ttype:
|
|
118
|
+
return "tiktoken"
|
|
119
|
+
|
|
120
|
+
# 4) our custom / other encoders that implement encode_batch
|
|
121
|
+
if hasattr(t, "encode_batch"):
|
|
122
|
+
return "encode_batch"
|
|
123
|
+
|
|
124
|
+
# 5) basic encode-only objects
|
|
125
|
+
if hasattr(t, "encode"):
|
|
126
|
+
return "encode_only"
|
|
127
|
+
|
|
128
|
+
# 6) finally: a callable token *counter* function: f(text)->int
|
|
129
|
+
# (must be LAST so we don't mis-detect HF tokenizers as callable counters)
|
|
130
|
+
if callable(t) or inspect.isfunction(t) or inspect.ismethod(t):
|
|
131
|
+
return "callable_counter"
|
|
132
|
+
|
|
133
|
+
raise ValueError(f"Tokenizer backend {ttype} not supported")
|
|
134
|
+
|
|
135
|
+
def _load_tokenizer(self, tokenizer_name: str):
|
|
136
|
+
# Same overall strategy as 0.4.1: try tiktoken -> autotiktokenizer -> tokenizers -> transformers
|
|
137
|
+
try:
|
|
138
|
+
if importlib.util.find_spec("tiktoken") is not None:
|
|
139
|
+
from tiktoken import get_encoding
|
|
140
|
+
return get_encoding(tokenizer_name)
|
|
141
|
+
raise RuntimeError("tiktoken not available")
|
|
142
|
+
except Exception:
|
|
143
|
+
try:
|
|
144
|
+
if importlib.util.find_spec("autotiktokenizer") is not None:
|
|
145
|
+
from autotiktokenizer import AutoTikTokenizer
|
|
146
|
+
return AutoTikTokenizer.from_pretrained(tokenizer_name)
|
|
147
|
+
raise RuntimeError("autotiktokenizer not available")
|
|
148
|
+
except Exception:
|
|
149
|
+
try:
|
|
150
|
+
if importlib.util.find_spec("tokenizers") is not None:
|
|
151
|
+
from tokenizers import Tokenizer
|
|
152
|
+
return Tokenizer.from_pretrained(tokenizer_name)
|
|
153
|
+
raise RuntimeError("tokenizers not available")
|
|
154
|
+
except Exception:
|
|
155
|
+
if importlib.util.find_spec("transformers") is not None:
|
|
156
|
+
from transformers import AutoTokenizer
|
|
157
|
+
return AutoTokenizer.from_pretrained(tokenizer_name)
|
|
158
|
+
raise ValueError(
|
|
159
|
+
"Tokenizer not found in: transformers, tokenizers, autotiktokenizer, tiktoken. "
|
|
160
|
+
"Install one of these."
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
def _get_tokenizer_counter(self) -> Callable[[str], int]:
|
|
164
|
+
t = self.tokenizer
|
|
165
|
+
if self._tokenizer_backend == "transformers":
|
|
166
|
+
return lambda text: len(t.encode(text, add_special_tokens=False))
|
|
167
|
+
if self._tokenizer_backend == "tokenizers":
|
|
168
|
+
return lambda text: len(t.encode(text, add_special_tokens=False).ids)
|
|
169
|
+
if self._tokenizer_backend == "tiktoken":
|
|
170
|
+
return lambda text: len(t.encode(text))
|
|
171
|
+
if self._tokenizer_backend == "encode_batch":
|
|
172
|
+
if hasattr(t, "encode"):
|
|
173
|
+
return lambda text: len(t.encode(text))
|
|
174
|
+
return lambda text: len(t.encode_batch([text])[0])
|
|
175
|
+
if self._tokenizer_backend == "encode_only":
|
|
176
|
+
return lambda text: len(t.encode(text))
|
|
177
|
+
if self._tokenizer_backend == "callable_counter":
|
|
178
|
+
return t # type: ignore[return-value]
|
|
179
|
+
raise ValueError("Tokenizer backend not supported for token counting")
|
|
180
|
+
|
|
181
|
+
def _encode_batch(self, texts: List[str]) -> List[List[int]]:
|
|
182
|
+
"""
|
|
183
|
+
Return list of token-id lists. Only lengths are used by WordChunker.
|
|
184
|
+
"""
|
|
185
|
+
t = self.tokenizer
|
|
186
|
+
if self._tokenizer_backend == "transformers":
|
|
187
|
+
# batch_encode_plus exists on most HF tokenizers; if not, fall back to __call__
|
|
188
|
+
if hasattr(t, "batch_encode_plus"):
|
|
189
|
+
return t.batch_encode_plus(texts, add_special_tokens=False)["input_ids"]
|
|
190
|
+
# Fallback: tokenizer(texts, add_special_tokens=False) -> BatchEncoding with input_ids
|
|
191
|
+
enc = t(texts, add_special_tokens=False)
|
|
192
|
+
return enc["input_ids"]
|
|
193
|
+
if self._tokenizer_backend == "tokenizers":
|
|
194
|
+
return [e.ids for e in t.encode_batch(texts, add_special_tokens=False)]
|
|
195
|
+
if self._tokenizer_backend == "tiktoken":
|
|
196
|
+
return t.encode_batch(texts)
|
|
197
|
+
if self._tokenizer_backend == "encode_batch":
|
|
198
|
+
return t.encode_batch(texts)
|
|
199
|
+
if self._tokenizer_backend == "encode_only":
|
|
200
|
+
return [t.encode(x) for x in texts]
|
|
201
|
+
if self._tokenizer_backend == "callable_counter":
|
|
202
|
+
# emulate "ids" with dummy list of length == token_count
|
|
203
|
+
out: List[List[int]] = []
|
|
204
|
+
for x in texts:
|
|
205
|
+
n = int(t(x))
|
|
206
|
+
out.append(list(range(n)))
|
|
207
|
+
return out
|
|
208
|
+
raise ValueError(f"Tokenizer backend {self._tokenizer_backend} not supported.")
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# -----------------------------
|
|
212
|
+
# WordChunker (exact 0.4.1 logic)
|
|
213
|
+
# -----------------------------
|
|
214
|
+
|
|
215
|
+
class WordChunker(BaseChunker):
|
|
216
|
+
"""
|
|
217
|
+
Exact port of chonkie==0.4.1 WordChunker (chunker/word.py), with identical behavior/quirks.
|
|
218
|
+
"""
|
|
219
|
+
|
|
220
|
+
def __init__(
|
|
221
|
+
self,
|
|
222
|
+
tokenizer: Union[str, Any] = "gpt2",
|
|
223
|
+
chunk_size: int = 512,
|
|
224
|
+
chunk_overlap: int = 128,
|
|
225
|
+
):
|
|
226
|
+
super().__init__(tokenizer)
|
|
227
|
+
|
|
228
|
+
if chunk_size <= 0:
|
|
229
|
+
raise ValueError("chunk_size must be positive")
|
|
230
|
+
if chunk_overlap >= chunk_size:
|
|
231
|
+
raise ValueError("chunk_overlap must be less than chunk_size")
|
|
232
|
+
|
|
233
|
+
self.chunk_size = chunk_size
|
|
234
|
+
self.chunk_overlap = chunk_overlap
|
|
235
|
+
|
|
236
|
+
def _split_into_words(self, text: str) -> List[str]:
|
|
237
|
+
split_points = [match.end() for match in re.finditer(r"(\s*\S+)", text)]
|
|
238
|
+
words: List[str] = []
|
|
239
|
+
prev = 0
|
|
240
|
+
|
|
241
|
+
for point in split_points:
|
|
242
|
+
words.append(text[prev:point])
|
|
243
|
+
prev = point
|
|
244
|
+
|
|
245
|
+
if prev < len(text):
|
|
246
|
+
words.append(text[prev:])
|
|
247
|
+
|
|
248
|
+
return words
|
|
249
|
+
|
|
250
|
+
def _create_chunk(
|
|
251
|
+
self,
|
|
252
|
+
words: List[str],
|
|
253
|
+
text: str,
|
|
254
|
+
token_count: int,
|
|
255
|
+
current_index: int = 0,
|
|
256
|
+
) -> Chunk:
|
|
257
|
+
chunk_text = "".join(words)
|
|
258
|
+
start_index = text.find(chunk_text, current_index)
|
|
259
|
+
return Chunk(
|
|
260
|
+
text=chunk_text,
|
|
261
|
+
start_index=start_index,
|
|
262
|
+
end_index=start_index + len(chunk_text),
|
|
263
|
+
token_count=token_count,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
def _get_word_list_token_counts(self, words: List[str]) -> List[int]:
|
|
267
|
+
words = [word for word in words if word != ""]
|
|
268
|
+
encodings = self._encode_batch(words)
|
|
269
|
+
return [len(encoding) for encoding in encodings]
|
|
270
|
+
|
|
271
|
+
def chunk(self, text: str) -> List[Chunk]:
|
|
272
|
+
if not text or not text.strip():
|
|
273
|
+
return []
|
|
274
|
+
|
|
275
|
+
words = self._split_into_words(text)
|
|
276
|
+
lengths = self._get_word_list_token_counts(words)
|
|
277
|
+
chunks: List[Chunk] = []
|
|
278
|
+
|
|
279
|
+
current_chunk: List[str] = []
|
|
280
|
+
current_chunk_length = 0
|
|
281
|
+
current_index = 0
|
|
282
|
+
|
|
283
|
+
for i, (word, length) in enumerate(zip(words, lengths)):
|
|
284
|
+
if current_chunk_length + length <= self.chunk_size:
|
|
285
|
+
current_chunk.append(word)
|
|
286
|
+
current_chunk_length += length
|
|
287
|
+
else:
|
|
288
|
+
chunk = self._create_chunk(current_chunk, text, current_chunk_length, current_index)
|
|
289
|
+
chunks.append(chunk)
|
|
290
|
+
|
|
291
|
+
previous_chunk_length = current_chunk_length
|
|
292
|
+
current_index = chunk.end_index
|
|
293
|
+
|
|
294
|
+
overlap: List[str] = []
|
|
295
|
+
overlap_length = 0
|
|
296
|
+
|
|
297
|
+
# Quirk/bug-compatible loop: previous_chunk_length is token count
|
|
298
|
+
for j in range(0, previous_chunk_length):
|
|
299
|
+
cwi = i - 1 - j
|
|
300
|
+
if cwi < 0:
|
|
301
|
+
break
|
|
302
|
+
oword = words[cwi]
|
|
303
|
+
olength = lengths[cwi]
|
|
304
|
+
if overlap_length + olength <= self.chunk_overlap:
|
|
305
|
+
overlap.append(oword)
|
|
306
|
+
overlap_length += olength
|
|
307
|
+
else:
|
|
308
|
+
break
|
|
309
|
+
|
|
310
|
+
current_chunk = [w for w in reversed(overlap)]
|
|
311
|
+
current_chunk_length = overlap_length
|
|
312
|
+
|
|
313
|
+
current_chunk.append(word)
|
|
314
|
+
current_chunk_length += length
|
|
315
|
+
|
|
316
|
+
if current_chunk:
|
|
317
|
+
# Quirk/bug-compatible: current_index not passed (defaults to 0)
|
|
318
|
+
chunk = self._create_chunk(current_chunk, text, current_chunk_length)
|
|
319
|
+
chunks.append(chunk)
|
|
320
|
+
|
|
321
|
+
return chunks
|
|
322
|
+
|
|
323
|
+
def __call__(self, text: str) -> List[Chunk]:
|
|
324
|
+
return self.chunk(text)
|
|
325
|
+
|
|
326
|
+
def __repr__(self) -> str:
|
|
327
|
+
return f"WordChunker(chunk_size={self.chunk_size}, chunk_overlap={self.chunk_overlap})"
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def chunk_words(content: str, tokenizer: Any, chunk_size: int = 300, chunk_overlap: int = 100):
|
|
331
|
+
chunker = WordChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
332
|
+
chunks = chunker.chunk(content)
|
|
333
|
+
return [c.text for c in chunks], []
|
|
@@ -1473,6 +1473,48 @@ def create_trigger_table():
|
|
|
1473
1473
|
return table
|
|
1474
1474
|
|
|
1475
1475
|
|
|
1476
|
+
def ensure_file_exists_recursive(path_holder: list[str]) -> bool:
|
|
1477
|
+
"""
|
|
1478
|
+
Vérifie l'existence d'un fichier.
|
|
1479
|
+
|
|
1480
|
+
- Si le chemin absolu existe → True
|
|
1481
|
+
- Sinon, cherche récursivement le fichier dans le dossier parent
|
|
1482
|
+
- Si trouvé → met à jour path_holder[0] avec le nouveau chemin absolu → True
|
|
1483
|
+
- Sinon → False (sans modifier path_holder)
|
|
1484
|
+
|
|
1485
|
+
Args:
|
|
1486
|
+
path_holder (list[str]): liste mutable contenant le chemin absolu du fichier
|
|
1487
|
+
|
|
1488
|
+
Returns:
|
|
1489
|
+
bool
|
|
1490
|
+
"""
|
|
1491
|
+
if not path_holder or not path_holder[0]:
|
|
1492
|
+
return False
|
|
1493
|
+
|
|
1494
|
+
original_path = Path(path_holder[0])
|
|
1495
|
+
|
|
1496
|
+
# 1️⃣ Cas simple : le fichier existe déjà
|
|
1497
|
+
if original_path.is_file():
|
|
1498
|
+
return True
|
|
1499
|
+
|
|
1500
|
+
parent_dir = original_path.parent
|
|
1501
|
+
filename = original_path.name
|
|
1502
|
+
|
|
1503
|
+
# 2️⃣ Sécurité minimale
|
|
1504
|
+
if not parent_dir.is_dir():
|
|
1505
|
+
return False
|
|
1506
|
+
|
|
1507
|
+
# 3️⃣ Recherche récursive dans les sous-dossiers
|
|
1508
|
+
for found in parent_dir.rglob(filename):
|
|
1509
|
+
if found.is_file():
|
|
1510
|
+
path_holder[0] = str(found.resolve())
|
|
1511
|
+
return True
|
|
1512
|
+
|
|
1513
|
+
# 4️⃣ Rien trouvé → ne pas modifier le chemin
|
|
1514
|
+
return False
|
|
1515
|
+
|
|
1516
|
+
|
|
1517
|
+
|
|
1476
1518
|
if __name__ == "__main__":
|
|
1477
1519
|
# avant faire un bouton
|
|
1478
1520
|
# set_aait_store_remote_ressources_path(ressource_path)
|
|
@@ -12,7 +12,7 @@ else:
|
|
|
12
12
|
from orangecontrib.AAIT.utils.import_uic import uic
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class BaseListWidget(widget.OWWidget):
|
|
15
|
+
class BaseListWidget(widget.OWWidget, openclass=True):
|
|
16
16
|
"""
|
|
17
17
|
Base Orange widget providing a filterable list of variables from the input data domain.
|
|
18
18
|
|
|
@@ -54,6 +54,14 @@ class BaseListWidget(widget.OWWidget):
|
|
|
54
54
|
self.var_selector.add_variables(self.data.domain)
|
|
55
55
|
self.var_selector.select_variable_by_name(self.selected_column_name)
|
|
56
56
|
```
|
|
57
|
+
7. You can use
|
|
58
|
+
autorun = Setting(True)
|
|
59
|
+
and self.autorun = True / self.autorun = False
|
|
60
|
+
to control whether the process is triggered automatically when input data is received.
|
|
61
|
+
|
|
62
|
+
8.self.data is the input data stream. Make sure to create
|
|
63
|
+
new_data = self.data.copy()
|
|
64
|
+
to avoid issues when the process is triggered several time via a push button.
|
|
57
65
|
"""
|
|
58
66
|
# Settings
|
|
59
67
|
selected_column_name = Setting("Default") # set the targeted column by default as "Default"
|
|
@@ -91,8 +99,8 @@ class BaseListWidget(widget.OWWidget):
|
|
|
91
99
|
|
|
92
100
|
# Data management
|
|
93
101
|
self.data = None
|
|
94
|
-
self.autorun
|
|
95
|
-
|
|
102
|
+
# If self.autorun exists, it is preserved. Otherwise, it is set to True.
|
|
103
|
+
self.autorun = getattr(self, "autorun", True)
|
|
96
104
|
|
|
97
105
|
def on_variable_selected(self, var_name):
|
|
98
106
|
"""Update the selected column when the user clicks an item."""
|