aait 2.3.15.994__tar.gz → 2.3.15.996__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aait-2.3.15.996/PKG-INFO +8 -0
- aait-2.3.15.996/aait.egg-info/PKG-INFO +8 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/aait.egg-info/SOURCES.txt +1 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/__init__.py +38 -1
- aait-2.3.15.996/orangecontrib/AAIT/llm/chunking.py +271 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/base_widget.py +1 -1
- aait-2.3.15.996/orangecontrib/AAIT/utils/tools/TigerODM_notepad.py +1 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWChunking.py +5 -6
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWConverseLLM.py +2 -1
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWLLMEngine.py +16 -6
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_Embeddings.py +12 -4
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWSplitExcelSheets.py +1 -1
- {aait-2.3.15.994 → aait-2.3.15.996}/setup.py +1 -1
- aait-2.3.15.994/PKG-INFO +0 -32
- aait-2.3.15.994/aait.egg-info/PKG-INFO +0 -32
- aait-2.3.15.994/orangecontrib/AAIT/llm/chunking.py +0 -149
- {aait-2.3.15.994 → aait-2.3.15.996}/aait.egg-info/dependency_links.txt +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/aait.egg-info/entry_points.txt +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/aait.egg-info/namespace_packages.txt +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/aait.egg-info/requires.txt +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/aait.egg-info/top_level.txt +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/llm/__init__.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/llm/answers.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/llm/answers_llama.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/llm/embeddings.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/llm/lemmes.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/llm/lmstudio.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/llm/process_documents.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/llm/prompt_management.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/llm/resources/__ini__.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/llm/resources/markdown_recipe.json +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/llm/translations.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/optimiser/__init__.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/optimiser/optuna_multi.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/MetManagement.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/OperationSystem.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/SimpleDialogQt.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/__init__.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/aait_repo_file.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/aait_table_viewer.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/delta_local_shared_fodler.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/import_uic.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/initialize_from_ini.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/mac_utils.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/shared_functions.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/shared_variables.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/subprocess_management.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/thread_management.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/tools/__init__.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/tools/change_owcorpus.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/tools/concat_splitted_pypi.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/tools/first_time_check.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/tools/owcorpus_ok.txt +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/unlink_table_domain.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/widget_positioning.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/utils/windows_utils.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWAAITResourcesManager.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWAccumulator.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWAddColumns.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWApplyRules.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWAutoShowCreateInstance.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWAutoShowTable.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWCN2rule_view.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWConcatRules.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWCreateEmbeddings.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWDisplayMD.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWEditTable.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWEmptySwitch.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWEndLoop.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWExecuteScript.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWExtraChunks.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWExtractTokens.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWFileMetadata.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWFileSyncChecker.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWFileWithPath.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWFindFilesFromDir.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWFusionNM.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWGenerateQuestions.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWGenerateSynthesis.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWGenerateWord.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWGetPages.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWInputSelector.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWKeywords.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWKeywordsDetection.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWLMStudio.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWLanguageDetection.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWLargeLanguageModel.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWLemmatizer.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWLoadDocuments.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_CE_MiniLML6.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_Falcon.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_HelsinkiEnFr.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_HelsinkiFrEn.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_MPNET.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_Mistral.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_Qwen.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_Qwen1B5_Q6.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_Qwen2_5_32B.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_Qwen3B_Q4.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q4.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_Qwen7B_Q6.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_Solar.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_SolarUncensored.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_EN.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWModel_SpacyMD_FR.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWOperationSystem.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWOptimisation.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWOptimisationSelection.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWOptimisationSendScore.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWProcessDocumentsFromPath.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWQuadrantclicker.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWQueryLLM.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWRandomData.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWReranking.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWSaveFilepathEntry.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWSelectColumnDynamique.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWSelectRowsDynamic.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWSortAndSelect.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWStartLoop.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWTable2Corpus.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWTranslation.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWTrigger.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/OWUnzipFolder.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/POW_Wfactory.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/__init__.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owMarkdown.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/ow_OperationSystem.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/ow_in_or_out_path.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/ow_widget_random_data.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owaccumulator.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owaddcolumns.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owapplyrules.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owchunking.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owconcatrules.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owconversellm.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owedgellm.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owembeddings.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owemptyswitch.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owendloop.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owexecutescript.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owexecutescript_TEST.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owextrachunks.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owextracttokens.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owfilemetadata.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owfilesyncchecker.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owfilewithpath.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owfindfilesfromdir.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owfusion_nm.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owgenerate_word.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owgeneratequestions.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owgeneratesynthesis.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owgetpages.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owkeyword.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owkeywordsdetection.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owlangdetect.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owlargelanguagemodel.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owlemmatizer.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owllm4all.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owloaddocuments.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owloadworkflow.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_ce_minilml6.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_embeddings.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_falcon.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_en_fr.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_helsinki_fr_en.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_mistral.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_mpnet.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_qwen.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_2.5_32b.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_1.5b_q6.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_3b_q4.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_7b_q4.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_qwen_instruct_7b_q6.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_qwencoder_7b.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_solar.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_solar_uncensored.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_en.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owmodel_spacymd_fr.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/ownumberpointinrules.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owprocessdocuments.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owquadrant_clicker.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owqueryllm.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owreranking.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owsavewithpath.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owselect_column_dynamic.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owselect_row_dynamic.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owsortandselect.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owsplitexcelsheets.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owstartloop.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owtable2corpus.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owtranslation.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owtrigger.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/designer/owunzipfolder.ui +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/CN2RuleViewer.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/CreateInstance.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/MDViewer.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/Mistral.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/Table.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/apply_rules.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/blue_down_arrow.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/book.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/category.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/dark_green.txt +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/de.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/document_generator.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/documents.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/edge_llm.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/endloop.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/extra_chunks.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/green_check.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/in_or_out.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/input.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/languages.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/lm_studio.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/local_interf_img_multi_pull.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/local_interf_multi_pull.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/local_interf_pull.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/local_interf_push.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/local_interf_text_pull.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/logo_solar.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/logo_uncensoredsolar.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/logo_upload.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/models.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/operationSystem.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/optimisation.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/optimizer.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/output.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owCN2_intersect_rules.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owaccumulator.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owchunking.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owconversellm.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owedittable.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owembeddings.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owemptyswitch.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owenvinfo.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owexecutescript.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owextracttokens.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owfilemetadata.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owfilesfromdir.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owfilesyncchecker.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owfilewithpath.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owfusion_nm.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owgeneratequestions.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owgeneratesynthesis.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owkeywords.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owkeywordsdetection.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owlargelanguagemodel.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owlemmatizer.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owloaddocuments.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owmodel_ce_minilml6.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owmodel_embeddings.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owmodel_falcon.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_en_fr.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owmodel_helsinki_fr_en.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owmodel_mpnet.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_en.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owmodel_spacymd_fr.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owoptimisation.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owoptimisationselection.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owqueryllm.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owreranking.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owsavefilepathentry.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owselectcolumndynamique.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owsortandselect.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owtable2corpus.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owtranslation.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/owtrigger.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/processdocuments.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/quadrantclicker.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/qwen-color.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/select_dynamic_row.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/splitexcelsheets.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/startloop.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/tools.png +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/widgetFactory.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/AAIT/widgets/icons/zip.svg +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/orangecontrib/__init__.py +0 -0
- {aait-2.3.15.994 → aait-2.3.15.996}/setup.cfg +0 -0
aait-2.3.15.996/PKG-INFO
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: aait
|
|
3
|
+
Version: 2.3.15.996
|
|
4
|
+
Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
|
|
5
|
+
Home-page:
|
|
6
|
+
Author: Orange community
|
|
7
|
+
Author-email:
|
|
8
|
+
Keywords: orange3 add-on
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: aait
|
|
3
|
+
Version: 2.3.15.996
|
|
4
|
+
Summary: Advanced Artificial Intelligence Tools is a package meant to develop and enable advanced AI functionalities in Orange
|
|
5
|
+
Home-page:
|
|
6
|
+
Author: Orange community
|
|
7
|
+
Author-email:
|
|
8
|
+
Keywords: orange3 add-on
|
|
@@ -40,6 +40,7 @@ orangecontrib/AAIT/utils/thread_management.py
|
|
|
40
40
|
orangecontrib/AAIT/utils/unlink_table_domain.py
|
|
41
41
|
orangecontrib/AAIT/utils/widget_positioning.py
|
|
42
42
|
orangecontrib/AAIT/utils/windows_utils.py
|
|
43
|
+
orangecontrib/AAIT/utils/tools/TigerODM_notepad.py
|
|
43
44
|
orangecontrib/AAIT/utils/tools/__init__.py
|
|
44
45
|
orangecontrib/AAIT/utils/tools/change_owcorpus.py
|
|
45
46
|
orangecontrib/AAIT/utils/tools/concat_splitted_pypi.py
|
|
@@ -330,4 +330,41 @@ else: # Execute the file
|
|
|
330
330
|
import_proprietary_categories()
|
|
331
331
|
duplicate_widget_if_needed_exept_POW_file()
|
|
332
332
|
|
|
333
|
-
duplicate_POW_file()
|
|
333
|
+
duplicate_POW_file()
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
from AnyQt import QtWidgets, QtGui, QtCore
|
|
338
|
+
|
|
339
|
+
def force_native_light_mode():
|
|
340
|
+
# 1. Get the current application instance
|
|
341
|
+
app = QtWidgets.QApplication.instance()
|
|
342
|
+
if not app:
|
|
343
|
+
return
|
|
344
|
+
|
|
345
|
+
# 2. Force the style back to 'Windows' or 'WindowsVista'
|
|
346
|
+
# This prevents Orange from using any custom Dark-specific styles
|
|
347
|
+
app.setStyle(QtWidgets.QStyleFactory.create("WindowsVista"))
|
|
348
|
+
|
|
349
|
+
# 3. Explicitly tell Qt to use the Light Palette
|
|
350
|
+
# We fetch the 'Standard' palette which defaults to Light colors
|
|
351
|
+
light_palette = QtGui.QPalette()
|
|
352
|
+
|
|
353
|
+
# Manually re-assert the Light Mode colors to override OS injection
|
|
354
|
+
light_palette.setColor(QtGui.QPalette.Window, QtGui.QColor(240, 240, 240))
|
|
355
|
+
light_palette.setColor(QtGui.QPalette.WindowText, QtCore.Qt.black)
|
|
356
|
+
light_palette.setColor(QtGui.QPalette.Base, QtCore.Qt.white)
|
|
357
|
+
light_palette.setColor(QtGui.QPalette.AlternateBase, QtGui.QColor(233, 233, 233))
|
|
358
|
+
light_palette.setColor(QtGui.QPalette.ToolTipBase, QtCore.Qt.white)
|
|
359
|
+
light_palette.setColor(QtGui.QPalette.ToolTipText, QtCore.Qt.black)
|
|
360
|
+
light_palette.setColor(QtGui.QPalette.Text, QtCore.Qt.black)
|
|
361
|
+
light_palette.setColor(QtGui.QPalette.Button, QtGui.QColor(240, 240, 240))
|
|
362
|
+
light_palette.setColor(QtGui.QPalette.ButtonText, QtCore.Qt.black)
|
|
363
|
+
light_palette.setColor(QtGui.QPalette.BrightText, QtCore.Qt.red)
|
|
364
|
+
light_palette.setColor(QtGui.QPalette.Link, QtGui.QColor(0, 0, 255))
|
|
365
|
+
light_palette.setColor(QtGui.QPalette.Highlight, QtGui.QColor(0, 120, 215))
|
|
366
|
+
light_palette.setColor(QtGui.QPalette.HighlightedText, QtCore.Qt.white)
|
|
367
|
+
|
|
368
|
+
app.setPalette(light_palette)
|
|
369
|
+
|
|
370
|
+
force_native_light_mode()
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import re
|
|
3
|
+
import os
|
|
4
|
+
import pathlib
|
|
5
|
+
import Orange
|
|
6
|
+
from Orange.data import Domain, Table, StringVariable, ContinuousVariable
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
### Chonkie
|
|
11
|
+
from importlib.metadata import version, PackageNotFoundError
|
|
12
|
+
from packaging.version import Version
|
|
13
|
+
try:
|
|
14
|
+
chonkie_version = Version(version("chonkie"))
|
|
15
|
+
except PackageNotFoundError:
|
|
16
|
+
chonkie_version = None
|
|
17
|
+
if chonkie_version is None:
|
|
18
|
+
raise RuntimeError("chonkie is not installed")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
elif chonkie_version >= Version("1.5.2"):
|
|
22
|
+
from chonkie import TokenChunker, SentenceChunker, RecursiveChunker, SemanticChunker, LateChunker
|
|
23
|
+
|
|
24
|
+
def create_chunks(table, column_name, tokenizer="character", chunk_size=300, chunk_overlap=100, mode="Token",
|
|
25
|
+
progress_callback=None, argself=None):
|
|
26
|
+
"""
|
|
27
|
+
Chunk the text in `column_name` of an Orange Table using a specialized chunker.
|
|
28
|
+
|
|
29
|
+
Splits each row's text into chunks based on the selected mode (Token, Sentence,
|
|
30
|
+
Recursive, or Markdown). Adds the chunked text and its metadata as new meta
|
|
31
|
+
columns to the table.
|
|
32
|
+
|
|
33
|
+
Parameters:
|
|
34
|
+
table (Table): Input data table.
|
|
35
|
+
column_name (str): Name of the text column to chunk.
|
|
36
|
+
tokenizer (str): Tokenizer type (e.g., "character").
|
|
37
|
+
chunk_size (int): Target chunk size.
|
|
38
|
+
chunk_overlap (int): Overlap between chunks (not used in all modes).
|
|
39
|
+
mode (str): Chunking strategy ("Token", "Sentence", "Recursive", "Markdown").
|
|
40
|
+
progress_callback (callable): Optional progress reporter.
|
|
41
|
+
argself: Optional caller reference.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Table: The table with added meta columns: "Chunks", "Chunks size", and "Metadata".
|
|
45
|
+
"""
|
|
46
|
+
print("This widget is being updated : default tokenizer 'character' enabled for compatibility !!")
|
|
47
|
+
tokenizer = "character"
|
|
48
|
+
|
|
49
|
+
# Définir la fonction de chunking selon le mode
|
|
50
|
+
if mode == "Token":
|
|
51
|
+
chunker = TokenChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
52
|
+
elif mode == "Sentence":
|
|
53
|
+
chunker = SentenceChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap,
|
|
54
|
+
min_sentences_per_chunk=1)
|
|
55
|
+
elif mode == "Recursive":
|
|
56
|
+
chunker = RecursiveChunker(tokenizer=tokenizer, chunk_size=chunk_size, min_characters_per_chunk=24)
|
|
57
|
+
|
|
58
|
+
### Les 3 modes suivants sont à tester après montée de version !!
|
|
59
|
+
### Attention à la gestion des modèles !! Tokenizer doit être un SentenceTransformer (Mpnet ou Qwen-0.6B)
|
|
60
|
+
### Gérer ça en amont dans le widget !
|
|
61
|
+
### Ajouter du paramétrage selon la méthode de chunking !!!
|
|
62
|
+
elif mode == "Markdown":
|
|
63
|
+
# from recipe, tester dépendances d'abord
|
|
64
|
+
current_dir = pathlib.Path(__file__).parent.resolve()
|
|
65
|
+
chunker = RecursiveChunker.from_recipe(path=os.path.join(current_dir, r"resources\markdown_recipe.json"),
|
|
66
|
+
tokenizer=tokenizer, chunk_size=400, min_characters_per_chunk=24)
|
|
67
|
+
elif mode == "Semantic":
|
|
68
|
+
chunker = SemanticChunker(embedding_model=tokenizer, threshold=0.7, chunk_size=chunk_size)
|
|
69
|
+
elif mode == "Late":
|
|
70
|
+
chunker = LateChunker(embedding_model=tokenizer, chunk_size=chunk_size, min_characters_per_chunk=24)
|
|
71
|
+
else:
|
|
72
|
+
raise ValueError(f"Invalid mode: {mode}. Valid modes are: Token, Sentence, Recursive, Markdown")
|
|
73
|
+
|
|
74
|
+
new_metas = list(table.domain.metas) + [StringVariable("Chunks"), ContinuousVariable("Chunks size"),
|
|
75
|
+
ContinuousVariable("Chunks index"), ContinuousVariable("Chunks start"),
|
|
76
|
+
ContinuousVariable("Chunks end"), StringVariable("Metadata")]
|
|
77
|
+
new_domain = Domain(table.domain.attributes, table.domain.class_vars, new_metas)
|
|
78
|
+
|
|
79
|
+
new_rows = []
|
|
80
|
+
for i, row in enumerate(table):
|
|
81
|
+
content = row[column_name].value
|
|
82
|
+
chunks = chunker(content)
|
|
83
|
+
# For each chunk in the chunked data
|
|
84
|
+
for j, chunk in enumerate(chunks):
|
|
85
|
+
# Build new metas with previous data and the chunk
|
|
86
|
+
new_metas_values = list(row.metas) + [chunk.text, chunk.token_count, j, chunk.start_index,
|
|
87
|
+
chunk.end_index, ""]
|
|
88
|
+
# Create the new row instance
|
|
89
|
+
new_instance = Orange.data.Instance(new_domain,
|
|
90
|
+
[row[x] for x in table.domain.attributes] + [row[y] for y in
|
|
91
|
+
table.domain.class_vars] + new_metas_values)
|
|
92
|
+
# Store the new row
|
|
93
|
+
new_rows.append(new_instance)
|
|
94
|
+
|
|
95
|
+
if progress_callback is not None:
|
|
96
|
+
progress_value = float(100 * (i + 1) / len(table))
|
|
97
|
+
progress_callback(progress_value)
|
|
98
|
+
if argself is not None:
|
|
99
|
+
if argself.stop:
|
|
100
|
+
break
|
|
101
|
+
|
|
102
|
+
return Table.from_list(domain=new_domain, rows=new_rows)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
else: # chonkie_version == Version("0.4.1"):
|
|
110
|
+
from chonkie import TokenChunker, WordChunker, SentenceChunker
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def create_chunks(table, column_name, model, chunk_size=500, overlap=125, mode="words", progress_callback=None,
|
|
114
|
+
argself=None):
|
|
115
|
+
"""
|
|
116
|
+
Chunk the text in `column_name` of an Orange Table.
|
|
117
|
+
|
|
118
|
+
Splits each row's text into overlapping chunks (by words or characters),
|
|
119
|
+
optionally reporting progress. Rows producing multiple chunks are duplicated.
|
|
120
|
+
|
|
121
|
+
Parameters:
|
|
122
|
+
table (Table): Input data table.
|
|
123
|
+
model: Embedding model used by the chunking pipeline.
|
|
124
|
+
column_name (str): Name of the text column to chunk.
|
|
125
|
+
chunk_size (int): Target chunk size.
|
|
126
|
+
overlap (int): Overlap between chunks.
|
|
127
|
+
mode (str): "words" or "characters".
|
|
128
|
+
progress_callback (callable): Optional progress reporter.
|
|
129
|
+
argself: Optional caller reference.
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Table: A new table with one row per chunk and a "Chunks" column.
|
|
133
|
+
"""
|
|
134
|
+
if model is None or isinstance(model, str):
|
|
135
|
+
return
|
|
136
|
+
|
|
137
|
+
data = copy.deepcopy(table)
|
|
138
|
+
|
|
139
|
+
# Définir la fonction de chunking selon le mode
|
|
140
|
+
# if mode == "tokens":
|
|
141
|
+
# chunk_function = chunk_tokens
|
|
142
|
+
if mode == "Token":
|
|
143
|
+
chunk_function = chunk_words
|
|
144
|
+
elif mode == "Recursive":
|
|
145
|
+
chunk_function = chunk_words
|
|
146
|
+
elif mode == "Sentence":
|
|
147
|
+
chunk_function = chunk_sentences
|
|
148
|
+
elif mode == "semantic":
|
|
149
|
+
chunk_function = chunk_semantic
|
|
150
|
+
elif mode == "markdown":
|
|
151
|
+
chunk_function = chunk_markdown
|
|
152
|
+
else:
|
|
153
|
+
raise ValueError(
|
|
154
|
+
f"Invalid mode: {mode}. Valid modes are: 'tokens', 'words', 'sentence', 'markdown', 'semantic'")
|
|
155
|
+
|
|
156
|
+
# new_metas = [StringVariable("Chunks"), ContinuousVariable("Chunks index"), StringVariable("Metadata")]
|
|
157
|
+
new_metas = list(data.domain.metas) + [StringVariable("Chunks"), ContinuousVariable("Chunks index"),
|
|
158
|
+
StringVariable("Metadata")]
|
|
159
|
+
new_domain = Domain(data.domain.attributes, data.domain.class_vars, new_metas)
|
|
160
|
+
|
|
161
|
+
new_rows = []
|
|
162
|
+
for i, row in enumerate(data):
|
|
163
|
+
content = row[column_name].value
|
|
164
|
+
chunks, metadatas = chunk_function(content, tokenizer=model.tokenizer, chunk_size=chunk_size,
|
|
165
|
+
chunk_overlap=overlap)
|
|
166
|
+
# For each chunk in the chunked data
|
|
167
|
+
for j, chunk in enumerate(chunks):
|
|
168
|
+
# Build a new row with the previous data and the chunk
|
|
169
|
+
if len(metadatas) == 0:
|
|
170
|
+
new_metas_values = list(row.metas) + [chunk] + [j] + [""]
|
|
171
|
+
else:
|
|
172
|
+
new_metas_values = list(row.metas) + [chunk] + [j] + [metadatas[j]]
|
|
173
|
+
new_instance = Orange.data.Instance(new_domain,
|
|
174
|
+
[row[x] for x in data.domain.attributes] + [row[y] for y in
|
|
175
|
+
data.domain.class_vars] + new_metas_values)
|
|
176
|
+
new_rows.append(new_instance)
|
|
177
|
+
|
|
178
|
+
return Table.from_list(domain=new_domain, rows=new_rows)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def chunk_tokens(content, tokenizer, chunk_size=512, chunk_overlap=128):
|
|
182
|
+
chunker = TokenChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
183
|
+
chunks = chunker.chunk(content)
|
|
184
|
+
chunks = [chunk.text for chunk in chunks]
|
|
185
|
+
return chunks, []
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def chunk_words(content, tokenizer, chunk_size=300, chunk_overlap=100):
|
|
189
|
+
chunker = WordChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
190
|
+
chunks = chunker.chunk(content)
|
|
191
|
+
chunks = [chunk.text for chunk in chunks]
|
|
192
|
+
return chunks, []
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def chunk_sentences(content, tokenizer, chunk_size=500, chunk_overlap=125):
|
|
196
|
+
chunker = SentenceChunker(tokenizer=tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap,
|
|
197
|
+
min_sentences_per_chunk=1)
|
|
198
|
+
chunks = chunker.chunk(content)
|
|
199
|
+
chunks = [chunk.text for chunk in chunks]
|
|
200
|
+
return chunks, []
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def chunk_markdown(content, tokenizer=None, chunk_size=500, chunk_overlap=125):
|
|
204
|
+
"""
|
|
205
|
+
Découpe un contenu Markdown en chunks :
|
|
206
|
+
- Si des en-têtes Markdown (#, ##, ###...) existent : on respecte la hiérarchie
|
|
207
|
+
et on inclut dans les métadonnées uniquement les titres de la branche courante.
|
|
208
|
+
- Sinon : on délègue à chunk_words().
|
|
209
|
+
|
|
210
|
+
Parameters
|
|
211
|
+
----------
|
|
212
|
+
content : str
|
|
213
|
+
Le contenu (Markdown ou texte brut).
|
|
214
|
+
tokenizer : any
|
|
215
|
+
Tokenizer utilisé par WordChunker si besoin.
|
|
216
|
+
chunk_size : int
|
|
217
|
+
Nombre max de mots par chunk.
|
|
218
|
+
chunk_overlap : int
|
|
219
|
+
Overlap (en mots) entre deux chunks consécutifs.
|
|
220
|
+
|
|
221
|
+
Returns
|
|
222
|
+
-------
|
|
223
|
+
(chunks, metadatas) : tuple(list[str], list[str])
|
|
224
|
+
chunks : segments de texte
|
|
225
|
+
metadatas : hiérarchies de titres associées (chaînes " ; " séparées), vide si aucun titre.
|
|
226
|
+
"""
|
|
227
|
+
if not content or not isinstance(content, str):
|
|
228
|
+
return [], []
|
|
229
|
+
|
|
230
|
+
header_regex = re.compile(r"^(#{1,6})\s+(.*)", re.MULTILINE)
|
|
231
|
+
matches = list(header_regex.finditer(content))
|
|
232
|
+
|
|
233
|
+
# Cas SANS en-têtes : appel direct à chunk_words
|
|
234
|
+
if not matches:
|
|
235
|
+
chunks, _ = chunk_words(content, tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
236
|
+
return chunks, [""] * len(chunks)
|
|
237
|
+
|
|
238
|
+
# Cas AVEC en-têtes : extraire les sections (level, title, body)
|
|
239
|
+
sections = []
|
|
240
|
+
for i, match in enumerate(matches):
|
|
241
|
+
level = len(match.group(1))
|
|
242
|
+
title = match.group(2).strip()
|
|
243
|
+
start = match.end()
|
|
244
|
+
end = matches[i + 1].start() if i + 1 < len(matches) else len(content)
|
|
245
|
+
body = content[start:end].strip()
|
|
246
|
+
sections.append((level, title, body))
|
|
247
|
+
|
|
248
|
+
chunks, metadatas = [], []
|
|
249
|
+
current_titles = {}
|
|
250
|
+
|
|
251
|
+
for level, title, body in sections:
|
|
252
|
+
# purge les niveaux >= level
|
|
253
|
+
for l in list(current_titles.keys()):
|
|
254
|
+
if l >= level:
|
|
255
|
+
current_titles.pop(l, None)
|
|
256
|
+
current_titles[level] = title
|
|
257
|
+
|
|
258
|
+
metadata = " ; ".join(current_titles[lvl] for lvl in sorted(current_titles) if lvl <= level)
|
|
259
|
+
|
|
260
|
+
# déléguer le découpage de body à chunk_words
|
|
261
|
+
body_chunks, _ = chunk_words(body, tokenizer, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
|
262
|
+
|
|
263
|
+
for ch in body_chunks:
|
|
264
|
+
chunks.append(ch)
|
|
265
|
+
metadatas.append(metadata)
|
|
266
|
+
|
|
267
|
+
return chunks, metadatas
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def chunk_semantic():
|
|
271
|
+
pass
|
|
@@ -12,7 +12,7 @@ else:
|
|
|
12
12
|
from orangecontrib.AAIT.utils.import_uic import uic
|
|
13
13
|
|
|
14
14
|
|
|
15
|
-
class BaseListWidget(widget.OWWidget):
|
|
15
|
+
class BaseListWidget(widget.OWWidget, openclass=True):
|
|
16
16
|
"""
|
|
17
17
|
Base Orange widget providing a filterable list of variables from the input data domain.
|
|
18
18
|
|