lionagi 0.3.8__py3-none-any.whl → 0.5.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- lionagi/__init__.py +14 -46
- lionagi/core/__init__.py +3 -1
- lionagi/core/_class_registry.py +69 -0
- lionagi/core/action/__init__.py +3 -13
- lionagi/core/action/action_manager.py +287 -0
- lionagi/core/action/base.py +109 -0
- lionagi/core/action/function_calling.py +127 -92
- lionagi/core/action/tool.py +172 -70
- lionagi/core/action/types.py +16 -0
- lionagi/core/communication/__init__.py +3 -0
- lionagi/core/communication/action_request.py +163 -0
- lionagi/core/communication/action_response.py +149 -0
- lionagi/core/communication/assistant_response.py +161 -0
- lionagi/core/communication/base_mail.py +49 -0
- lionagi/core/communication/instruction.py +376 -0
- lionagi/core/communication/message.py +286 -0
- lionagi/core/communication/message_manager.py +530 -0
- lionagi/core/communication/system.py +116 -0
- lionagi/core/communication/templates/README.md +28 -0
- lionagi/core/communication/templates/action_request.jinja2 +5 -0
- lionagi/core/communication/templates/action_response.jinja2 +9 -0
- lionagi/core/communication/templates/assistant_response.jinja2 +2 -0
- lionagi/core/communication/templates/instruction_message.jinja2 +61 -0
- lionagi/core/communication/templates/system_message.jinja2 +11 -0
- lionagi/core/communication/templates/tool_schemas.jinja2 +7 -0
- lionagi/core/communication/types.py +27 -0
- lionagi/core/communication/utils.py +254 -0
- lionagi/core/forms/__init__.py +3 -0
- lionagi/core/forms/base.py +232 -0
- lionagi/core/forms/form.py +791 -0
- lionagi/core/forms/report.py +321 -0
- lionagi/core/forms/types.py +13 -0
- lionagi/core/forms/utils.py +26 -0
- lionagi/core/generic/__init__.py +3 -6
- lionagi/core/generic/component.py +422 -0
- lionagi/core/generic/edge.py +143 -101
- lionagi/core/generic/element.py +195 -0
- lionagi/core/generic/graph.py +297 -180
- lionagi/core/generic/log.py +151 -0
- lionagi/core/generic/log_manager.py +320 -0
- lionagi/core/generic/node.py +7 -229
- lionagi/core/generic/pile.py +1017 -0
- lionagi/core/generic/progression.py +388 -0
- lionagi/core/generic/types.py +23 -0
- lionagi/core/generic/utils.py +50 -0
- lionagi/core/models/__init__.py +5 -0
- lionagi/core/models/base.py +85 -0
- lionagi/core/models/field_model.py +122 -0
- lionagi/core/models/new_model_params.py +195 -0
- lionagi/core/models/note.py +351 -0
- lionagi/core/models/operable_model.py +392 -0
- lionagi/core/models/schema_model.py +50 -0
- lionagi/core/models/types.py +10 -0
- lionagi/core/session/__init__.py +3 -0
- lionagi/core/session/branch.py +115 -415
- lionagi/core/session/branch_mixins.py +507 -0
- lionagi/core/session/session.py +122 -257
- lionagi/core/session/types.py +8 -0
- lionagi/core/typing/__init__.py +9 -0
- lionagi/core/typing/concepts.py +132 -0
- lionagi/core/typing/config.py +15 -0
- lionagi/core/typing/id.py +221 -0
- lionagi/core/typing/pydantic_.py +33 -0
- lionagi/core/typing/typing_.py +54 -0
- lionagi/integrations/__init__.py +0 -1
- lionagi/integrations/anthropic_/AnthropicModel.py +268 -0
- lionagi/integrations/anthropic_/AnthropicService.py +113 -0
- lionagi/integrations/anthropic_/__init__.py +3 -0
- lionagi/integrations/anthropic_/anthropic_max_output_token_data.yaml +7 -0
- lionagi/integrations/anthropic_/anthropic_price_data.yaml +14 -0
- lionagi/integrations/anthropic_/api_endpoints/__init__.py +3 -0
- lionagi/integrations/anthropic_/api_endpoints/api_request.py +277 -0
- lionagi/integrations/anthropic_/api_endpoints/data_models.py +40 -0
- lionagi/integrations/anthropic_/api_endpoints/match_response.py +119 -0
- lionagi/integrations/anthropic_/api_endpoints/messages/__init__.py +3 -0
- lionagi/integrations/anthropic_/api_endpoints/messages/request/__init__.py +3 -0
- lionagi/integrations/anthropic_/api_endpoints/messages/request/message_models.py +14 -0
- lionagi/integrations/anthropic_/api_endpoints/messages/request/request_body.py +74 -0
- lionagi/integrations/anthropic_/api_endpoints/messages/response/content_models.py +32 -0
- lionagi/integrations/anthropic_/api_endpoints/messages/response/response_body.py +101 -0
- lionagi/integrations/anthropic_/api_endpoints/messages/response/usage_models.py +25 -0
- lionagi/integrations/anthropic_/version.py +5 -0
- lionagi/integrations/groq_/GroqModel.py +318 -0
- lionagi/integrations/groq_/GroqService.py +147 -0
- lionagi/integrations/groq_/__init__.py +3 -0
- lionagi/integrations/groq_/api_endpoints/data_models.py +187 -0
- lionagi/integrations/groq_/api_endpoints/groq_request.py +288 -0
- lionagi/integrations/groq_/api_endpoints/match_response.py +106 -0
- lionagi/integrations/groq_/api_endpoints/response_utils.py +105 -0
- lionagi/integrations/groq_/groq_max_output_token_data.yaml +21 -0
- lionagi/integrations/groq_/groq_price_data.yaml +58 -0
- lionagi/integrations/groq_/groq_rate_limits.yaml +105 -0
- lionagi/integrations/groq_/version.py +5 -0
- lionagi/integrations/litellm_/__init__.py +3 -0
- lionagi/integrations/litellm_/imodel.py +69 -0
- lionagi/integrations/ollama_/OllamaModel.py +244 -0
- lionagi/integrations/ollama_/OllamaService.py +138 -0
- lionagi/integrations/ollama_/__init__.py +3 -0
- lionagi/integrations/ollama_/api_endpoints/__init__.py +3 -0
- lionagi/integrations/ollama_/api_endpoints/api_request.py +179 -0
- lionagi/integrations/ollama_/api_endpoints/chat_completion/__init__.py +3 -0
- lionagi/integrations/ollama_/api_endpoints/chat_completion/message_models.py +31 -0
- lionagi/integrations/ollama_/api_endpoints/chat_completion/request_body.py +46 -0
- lionagi/integrations/ollama_/api_endpoints/chat_completion/response_body.py +67 -0
- lionagi/integrations/ollama_/api_endpoints/chat_completion/tool_models.py +49 -0
- lionagi/integrations/ollama_/api_endpoints/completion/request_body.py +72 -0
- lionagi/integrations/ollama_/api_endpoints/completion/response_body.py +59 -0
- lionagi/integrations/ollama_/api_endpoints/data_models.py +15 -0
- lionagi/integrations/ollama_/api_endpoints/embedding/request_body.py +33 -0
- lionagi/integrations/ollama_/api_endpoints/embedding/response_body.py +29 -0
- lionagi/integrations/ollama_/api_endpoints/match_data_model.py +62 -0
- lionagi/integrations/ollama_/api_endpoints/match_response.py +190 -0
- lionagi/integrations/ollama_/api_endpoints/model/__init__.py +3 -0
- lionagi/integrations/ollama_/api_endpoints/model/copy_model.py +13 -0
- lionagi/integrations/ollama_/api_endpoints/model/create_model.py +28 -0
- lionagi/integrations/ollama_/api_endpoints/model/delete_model.py +11 -0
- lionagi/integrations/ollama_/api_endpoints/model/list_model.py +60 -0
- lionagi/integrations/ollama_/api_endpoints/model/pull_model.py +34 -0
- lionagi/integrations/ollama_/api_endpoints/model/push_model.py +35 -0
- lionagi/integrations/ollama_/api_endpoints/model/show_model.py +36 -0
- lionagi/integrations/ollama_/api_endpoints/option_models.py +68 -0
- lionagi/integrations/openai_/OpenAIModel.py +414 -0
- lionagi/integrations/openai_/OpenAIService.py +426 -0
- lionagi/integrations/openai_/api_endpoints/__init__.py +3 -0
- lionagi/integrations/openai_/api_endpoints/api_request.py +277 -0
- lionagi/integrations/openai_/api_endpoints/audio/__init__.py +9 -0
- lionagi/integrations/openai_/api_endpoints/audio/speech_models.py +34 -0
- lionagi/integrations/openai_/api_endpoints/audio/transcription_models.py +136 -0
- lionagi/integrations/openai_/api_endpoints/audio/translation_models.py +41 -0
- lionagi/integrations/openai_/api_endpoints/audio/types.py +41 -0
- lionagi/integrations/openai_/api_endpoints/batch/__init__.py +17 -0
- lionagi/integrations/openai_/api_endpoints/batch/batch_models.py +146 -0
- lionagi/integrations/openai_/api_endpoints/batch/cancel_batch.py +7 -0
- lionagi/integrations/openai_/api_endpoints/batch/create_batch.py +26 -0
- lionagi/integrations/openai_/api_endpoints/batch/list_batch.py +37 -0
- lionagi/integrations/openai_/api_endpoints/batch/request_object_models.py +65 -0
- lionagi/integrations/openai_/api_endpoints/batch/retrieve_batch.py +7 -0
- lionagi/integrations/openai_/api_endpoints/batch/types.py +4 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/__init__.py +1 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/request/__init__.py +39 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/request/message_models.py +121 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/request/request_body.py +221 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/request/response_format.py +71 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/request/stream_options.py +14 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/request/tool_choice_models.py +17 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/request/tool_models.py +54 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/request/types.py +18 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/response/choice_models.py +62 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/response/function_models.py +16 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/response/log_prob_models.py +47 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/response/message_models.py +25 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/response/response_body.py +99 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/response/types.py +8 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/response/usage_models.py +24 -0
- lionagi/integrations/openai_/api_endpoints/chat_completions/util.py +46 -0
- lionagi/integrations/openai_/api_endpoints/data_models.py +23 -0
- lionagi/integrations/openai_/api_endpoints/embeddings/__init__.py +3 -0
- lionagi/integrations/openai_/api_endpoints/embeddings/request_body.py +79 -0
- lionagi/integrations/openai_/api_endpoints/embeddings/response_body.py +67 -0
- lionagi/integrations/openai_/api_endpoints/files/__init__.py +11 -0
- lionagi/integrations/openai_/api_endpoints/files/delete_file.py +20 -0
- lionagi/integrations/openai_/api_endpoints/files/file_models.py +56 -0
- lionagi/integrations/openai_/api_endpoints/files/list_files.py +27 -0
- lionagi/integrations/openai_/api_endpoints/files/retrieve_file.py +9 -0
- lionagi/integrations/openai_/api_endpoints/files/upload_file.py +38 -0
- lionagi/integrations/openai_/api_endpoints/fine_tuning/__init__.py +37 -0
- lionagi/integrations/openai_/api_endpoints/fine_tuning/cancel_jobs.py +9 -0
- lionagi/integrations/openai_/api_endpoints/fine_tuning/create_jobs.py +133 -0
- lionagi/integrations/openai_/api_endpoints/fine_tuning/fine_tuning_job_checkpoint_models.py +58 -0
- lionagi/integrations/openai_/api_endpoints/fine_tuning/fine_tuning_job_event_models.py +31 -0
- lionagi/integrations/openai_/api_endpoints/fine_tuning/fine_tuning_job_models.py +140 -0
- lionagi/integrations/openai_/api_endpoints/fine_tuning/list_fine_tuning_checkpoints.py +51 -0
- lionagi/integrations/openai_/api_endpoints/fine_tuning/list_fine_tuning_events.py +42 -0
- lionagi/integrations/openai_/api_endpoints/fine_tuning/list_fine_tuning_jobs.py +31 -0
- lionagi/integrations/openai_/api_endpoints/fine_tuning/retrieve_jobs.py +9 -0
- lionagi/integrations/openai_/api_endpoints/fine_tuning/training_format.py +30 -0
- lionagi/integrations/openai_/api_endpoints/images/__init__.py +9 -0
- lionagi/integrations/openai_/api_endpoints/images/image_edit_models.py +69 -0
- lionagi/integrations/openai_/api_endpoints/images/image_models.py +56 -0
- lionagi/integrations/openai_/api_endpoints/images/image_variation_models.py +56 -0
- lionagi/integrations/openai_/api_endpoints/images/response_body.py +30 -0
- lionagi/integrations/openai_/api_endpoints/match_data_model.py +197 -0
- lionagi/integrations/openai_/api_endpoints/match_response.py +336 -0
- lionagi/integrations/openai_/api_endpoints/models/__init__.py +7 -0
- lionagi/integrations/openai_/api_endpoints/models/delete_fine_tuned_model.py +17 -0
- lionagi/integrations/openai_/api_endpoints/models/models_models.py +31 -0
- lionagi/integrations/openai_/api_endpoints/models/retrieve_model.py +9 -0
- lionagi/integrations/openai_/api_endpoints/moderations/__init__.py +3 -0
- lionagi/integrations/openai_/api_endpoints/moderations/request_body.py +20 -0
- lionagi/integrations/openai_/api_endpoints/moderations/response_body.py +139 -0
- lionagi/integrations/openai_/api_endpoints/uploads/__init__.py +19 -0
- lionagi/integrations/openai_/api_endpoints/uploads/add_upload_part.py +11 -0
- lionagi/integrations/openai_/api_endpoints/uploads/cancel_upload.py +7 -0
- lionagi/integrations/openai_/api_endpoints/uploads/complete_upload.py +18 -0
- lionagi/integrations/openai_/api_endpoints/uploads/create_upload.py +17 -0
- lionagi/integrations/openai_/api_endpoints/uploads/uploads_models.py +52 -0
- lionagi/integrations/openai_/image_token_calculator/image_token_calculator.py +92 -0
- lionagi/integrations/openai_/image_token_calculator/openai_image_token_data.yaml +15 -0
- lionagi/integrations/openai_/openai_max_output_token_data.yaml +12 -0
- lionagi/integrations/openai_/openai_price_data.yaml +26 -0
- lionagi/integrations/openai_/version.py +1 -0
- lionagi/integrations/pandas_/__init__.py +24 -0
- lionagi/integrations/pandas_/extend_df.py +61 -0
- lionagi/integrations/pandas_/read.py +103 -0
- lionagi/integrations/pandas_/remove_rows.py +61 -0
- lionagi/integrations/pandas_/replace_keywords.py +65 -0
- lionagi/integrations/pandas_/save.py +131 -0
- lionagi/integrations/pandas_/search_keywords.py +69 -0
- lionagi/integrations/pandas_/to_df.py +196 -0
- lionagi/integrations/pandas_/update_cells.py +54 -0
- lionagi/integrations/perplexity_/PerplexityModel.py +269 -0
- lionagi/integrations/perplexity_/PerplexityService.py +109 -0
- lionagi/integrations/perplexity_/__init__.py +3 -0
- lionagi/integrations/perplexity_/api_endpoints/api_request.py +171 -0
- lionagi/integrations/perplexity_/api_endpoints/chat_completions/request/request_body.py +121 -0
- lionagi/integrations/perplexity_/api_endpoints/chat_completions/response/response_body.py +146 -0
- lionagi/integrations/perplexity_/api_endpoints/data_models.py +63 -0
- lionagi/integrations/perplexity_/api_endpoints/match_response.py +26 -0
- lionagi/integrations/perplexity_/perplexity_max_output_token_data.yaml +3 -0
- lionagi/integrations/perplexity_/perplexity_price_data.yaml +10 -0
- lionagi/integrations/perplexity_/version.py +1 -0
- lionagi/integrations/pydantic_/__init__.py +8 -0
- lionagi/integrations/pydantic_/break_down_annotation.py +81 -0
- lionagi/integrations/pydantic_/new_model.py +208 -0
- lionagi/integrations/services.py +17 -0
- lionagi/libs/__init__.py +0 -55
- lionagi/libs/compress/models.py +62 -0
- lionagi/libs/compress/utils.py +81 -0
- lionagi/libs/constants.py +98 -0
- lionagi/libs/file/chunk.py +265 -0
- lionagi/libs/file/file_ops.py +114 -0
- lionagi/libs/file/params.py +212 -0
- lionagi/libs/file/path.py +301 -0
- lionagi/libs/file/process.py +139 -0
- lionagi/libs/file/save.py +90 -0
- lionagi/libs/file/types.py +22 -0
- lionagi/libs/func/async_calls/__init__.py +21 -0
- lionagi/libs/func/async_calls/alcall.py +157 -0
- lionagi/libs/func/async_calls/bcall.py +82 -0
- lionagi/libs/func/async_calls/mcall.py +134 -0
- lionagi/libs/func/async_calls/pcall.py +149 -0
- lionagi/libs/func/async_calls/rcall.py +185 -0
- lionagi/libs/func/async_calls/tcall.py +114 -0
- lionagi/libs/func/async_calls/ucall.py +85 -0
- lionagi/libs/func/decorators.py +277 -0
- lionagi/libs/func/lcall.py +57 -0
- lionagi/libs/func/params.py +64 -0
- lionagi/libs/func/throttle.py +119 -0
- lionagi/libs/func/types.py +39 -0
- lionagi/libs/func/utils.py +96 -0
- lionagi/libs/package/imports.py +162 -0
- lionagi/libs/package/management.py +58 -0
- lionagi/libs/package/params.py +26 -0
- lionagi/libs/package/system.py +18 -0
- lionagi/libs/package/types.py +26 -0
- lionagi/libs/parse/__init__.py +1 -0
- lionagi/libs/parse/flatten/__init__.py +9 -0
- lionagi/libs/parse/flatten/flatten.py +168 -0
- lionagi/libs/parse/flatten/params.py +52 -0
- lionagi/libs/parse/flatten/unflatten.py +79 -0
- lionagi/libs/parse/json/__init__.py +27 -0
- lionagi/libs/parse/json/as_readable.py +104 -0
- lionagi/libs/parse/json/extract.py +102 -0
- lionagi/libs/parse/json/parse.py +179 -0
- lionagi/libs/parse/json/schema.py +227 -0
- lionagi/libs/parse/json/to_json.py +71 -0
- lionagi/libs/parse/nested/__init__.py +33 -0
- lionagi/libs/parse/nested/nfilter.py +55 -0
- lionagi/libs/parse/nested/nget.py +40 -0
- lionagi/libs/parse/nested/ninsert.py +103 -0
- lionagi/libs/parse/nested/nmerge.py +155 -0
- lionagi/libs/parse/nested/npop.py +66 -0
- lionagi/libs/parse/nested/nset.py +89 -0
- lionagi/libs/parse/nested/to_flat_list.py +64 -0
- lionagi/libs/parse/nested/utils.py +185 -0
- lionagi/libs/parse/string_parse/__init__.py +11 -0
- lionagi/libs/parse/string_parse/code_block.py +73 -0
- lionagi/libs/parse/string_parse/docstring.py +179 -0
- lionagi/libs/parse/string_parse/function_.py +92 -0
- lionagi/libs/parse/type_convert/__init__.py +19 -0
- lionagi/libs/parse/type_convert/params.py +145 -0
- lionagi/libs/parse/type_convert/to_dict.py +333 -0
- lionagi/libs/parse/type_convert/to_list.py +186 -0
- lionagi/libs/parse/type_convert/to_num.py +358 -0
- lionagi/libs/parse/type_convert/to_str.py +195 -0
- lionagi/libs/parse/types.py +9 -0
- lionagi/libs/parse/validate/__init__.py +14 -0
- lionagi/libs/parse/validate/boolean.py +96 -0
- lionagi/libs/parse/validate/keys.py +150 -0
- lionagi/libs/parse/validate/mapping.py +109 -0
- lionagi/libs/parse/validate/params.py +62 -0
- lionagi/libs/parse/xml/__init__.py +10 -0
- lionagi/libs/parse/xml/convert.py +56 -0
- lionagi/libs/parse/xml/parser.py +93 -0
- lionagi/libs/string_similarity/__init__.py +32 -0
- lionagi/libs/string_similarity/algorithms.py +219 -0
- lionagi/libs/string_similarity/matcher.py +102 -0
- lionagi/libs/string_similarity/utils.py +15 -0
- lionagi/libs/utils.py +255 -0
- lionagi/operations/__init__.py +3 -6
- lionagi/operations/brainstorm/__init__.py +3 -0
- lionagi/operations/brainstorm/brainstorm.py +204 -0
- lionagi/operations/brainstorm/prompt.py +1 -0
- lionagi/operations/plan/__init__.py +3 -0
- lionagi/operations/plan/plan.py +172 -0
- lionagi/operations/plan/prompt.py +21 -0
- lionagi/operations/select/__init__.py +3 -0
- lionagi/operations/select/prompt.py +1 -0
- lionagi/operations/select/select.py +100 -0
- lionagi/operations/select/utils.py +107 -0
- lionagi/operations/utils.py +35 -0
- lionagi/protocols/adapters/adapter.py +79 -0
- lionagi/protocols/adapters/json_adapter.py +43 -0
- lionagi/protocols/adapters/pandas_adapter.py +96 -0
- lionagi/protocols/configs/__init__.py +15 -0
- lionagi/protocols/configs/branch_config.py +86 -0
- lionagi/protocols/configs/id_config.py +15 -0
- lionagi/protocols/configs/imodel_config.py +73 -0
- lionagi/protocols/configs/log_config.py +93 -0
- lionagi/protocols/configs/retry_config.py +29 -0
- lionagi/protocols/operatives/__init__.py +15 -0
- lionagi/protocols/operatives/action.py +181 -0
- lionagi/protocols/operatives/instruct.py +196 -0
- lionagi/protocols/operatives/operative.py +182 -0
- lionagi/protocols/operatives/prompts.py +232 -0
- lionagi/protocols/operatives/reason.py +56 -0
- lionagi/protocols/operatives/step.py +217 -0
- lionagi/protocols/registries/_component_registry.py +19 -0
- lionagi/protocols/registries/_pile_registry.py +26 -0
- lionagi/service/__init__.py +13 -0
- lionagi/service/complete_request_info.py +11 -0
- lionagi/service/imodel.py +110 -0
- lionagi/service/rate_limiter.py +108 -0
- lionagi/service/service.py +37 -0
- lionagi/service/service_match_util.py +131 -0
- lionagi/service/service_util.py +72 -0
- lionagi/service/token_calculator.py +51 -0
- lionagi/settings.py +136 -0
- lionagi/strategies/base.py +53 -0
- lionagi/strategies/concurrent.py +71 -0
- lionagi/strategies/concurrent_chunk.py +43 -0
- lionagi/strategies/concurrent_sequential_chunk.py +104 -0
- lionagi/strategies/params.py +128 -0
- lionagi/strategies/sequential.py +23 -0
- lionagi/strategies/sequential_chunk.py +89 -0
- lionagi/strategies/sequential_concurrent_chunk.py +100 -0
- lionagi/strategies/types.py +21 -0
- lionagi/strategies/utils.py +49 -0
- lionagi/version.py +1 -1
- lionagi-0.5.0.dist-info/METADATA +348 -0
- lionagi-0.5.0.dist-info/RECORD +373 -0
- {lionagi-0.3.8.dist-info → lionagi-0.5.0.dist-info}/WHEEL +1 -1
- lionagi/core/_setting/_setting.py +0 -59
- lionagi/core/action/README.md +0 -20
- lionagi/core/action/manual.py +0 -1
- lionagi/core/action/node.py +0 -94
- lionagi/core/action/tool_manager.py +0 -342
- lionagi/core/agent/README.md +0 -1
- lionagi/core/agent/base_agent.py +0 -82
- lionagi/core/agent/eval/README.md +0 -1
- lionagi/core/agent/eval/evaluator.py +0 -1
- lionagi/core/agent/eval/vote.py +0 -40
- lionagi/core/agent/learn/learner.py +0 -59
- lionagi/core/agent/plan/unit_template.py +0 -1
- lionagi/core/collections/README.md +0 -23
- lionagi/core/collections/__init__.py +0 -16
- lionagi/core/collections/_logger.py +0 -312
- lionagi/core/collections/abc/README.md +0 -63
- lionagi/core/collections/abc/__init__.py +0 -53
- lionagi/core/collections/abc/component.py +0 -620
- lionagi/core/collections/abc/concepts.py +0 -277
- lionagi/core/collections/abc/exceptions.py +0 -136
- lionagi/core/collections/abc/util.py +0 -45
- lionagi/core/collections/exchange.py +0 -146
- lionagi/core/collections/flow.py +0 -416
- lionagi/core/collections/model.py +0 -465
- lionagi/core/collections/pile.py +0 -1232
- lionagi/core/collections/progression.py +0 -221
- lionagi/core/collections/util.py +0 -73
- lionagi/core/director/README.md +0 -1
- lionagi/core/director/direct.py +0 -298
- lionagi/core/director/director.py +0 -2
- lionagi/core/director/operations/select.py +0 -3
- lionagi/core/director/operations/utils.py +0 -6
- lionagi/core/engine/branch_engine.py +0 -361
- lionagi/core/engine/instruction_map_engine.py +0 -213
- lionagi/core/engine/sandbox_.py +0 -16
- lionagi/core/engine/script_engine.py +0 -89
- lionagi/core/executor/base_executor.py +0 -97
- lionagi/core/executor/graph_executor.py +0 -335
- lionagi/core/executor/neo4j_executor.py +0 -394
- lionagi/core/generic/README.md +0 -0
- lionagi/core/generic/edge_condition.py +0 -17
- lionagi/core/generic/hyperedge.py +0 -1
- lionagi/core/generic/tree.py +0 -49
- lionagi/core/generic/tree_node.py +0 -85
- lionagi/core/mail/__init__.py +0 -11
- lionagi/core/mail/mail.py +0 -26
- lionagi/core/mail/mail_manager.py +0 -185
- lionagi/core/mail/package.py +0 -49
- lionagi/core/mail/start_mail.py +0 -36
- lionagi/core/message/__init__.py +0 -18
- lionagi/core/message/action_request.py +0 -114
- lionagi/core/message/action_response.py +0 -121
- lionagi/core/message/assistant_response.py +0 -80
- lionagi/core/message/instruction.py +0 -194
- lionagi/core/message/message.py +0 -86
- lionagi/core/message/system.py +0 -71
- lionagi/core/message/util.py +0 -274
- lionagi/core/report/__init__.py +0 -4
- lionagi/core/report/base.py +0 -201
- lionagi/core/report/form.py +0 -212
- lionagi/core/report/report.py +0 -150
- lionagi/core/report/util.py +0 -15
- lionagi/core/rule/_default.py +0 -17
- lionagi/core/rule/action.py +0 -87
- lionagi/core/rule/base.py +0 -234
- lionagi/core/rule/boolean.py +0 -56
- lionagi/core/rule/choice.py +0 -48
- lionagi/core/rule/mapping.py +0 -82
- lionagi/core/rule/number.py +0 -73
- lionagi/core/rule/rulebook.py +0 -45
- lionagi/core/rule/string.py +0 -43
- lionagi/core/rule/util.py +0 -0
- lionagi/core/session/directive_mixin.py +0 -307
- lionagi/core/structure/__init__.py +0 -1
- lionagi/core/structure/chain.py +0 -1
- lionagi/core/structure/forest.py +0 -1
- lionagi/core/structure/graph.py +0 -1
- lionagi/core/structure/tree.py +0 -1
- lionagi/core/unit/__init__.py +0 -4
- lionagi/core/unit/parallel_unit.py +0 -234
- lionagi/core/unit/template/action.py +0 -65
- lionagi/core/unit/template/base.py +0 -35
- lionagi/core/unit/template/plan.py +0 -69
- lionagi/core/unit/template/predict.py +0 -95
- lionagi/core/unit/template/score.py +0 -108
- lionagi/core/unit/template/select.py +0 -91
- lionagi/core/unit/unit.py +0 -452
- lionagi/core/unit/unit_form.py +0 -290
- lionagi/core/unit/unit_mixin.py +0 -1166
- lionagi/core/unit/util.py +0 -103
- lionagi/core/validator/validator.py +0 -376
- lionagi/core/work/work.py +0 -59
- lionagi/core/work/work_edge.py +0 -102
- lionagi/core/work/work_function.py +0 -114
- lionagi/core/work/work_function_node.py +0 -50
- lionagi/core/work/work_queue.py +0 -90
- lionagi/core/work/work_task.py +0 -151
- lionagi/core/work/worker.py +0 -410
- lionagi/core/work/worker_engine.py +0 -208
- lionagi/core/work/worklog.py +0 -108
- lionagi/experimental/compressor/base.py +0 -47
- lionagi/experimental/compressor/llm_compressor.py +0 -265
- lionagi/experimental/compressor/llm_summarizer.py +0 -61
- lionagi/experimental/compressor/util.py +0 -70
- lionagi/experimental/directive/README.md +0 -1
- lionagi/experimental/directive/__init__.py +0 -19
- lionagi/experimental/directive/parser/base_parser.py +0 -294
- lionagi/experimental/directive/parser/base_syntax.txt +0 -200
- lionagi/experimental/directive/template/base_template.py +0 -71
- lionagi/experimental/directive/template/schema.py +0 -36
- lionagi/experimental/directive/tokenizer.py +0 -59
- lionagi/experimental/evaluator/README.md +0 -1
- lionagi/experimental/evaluator/ast_evaluator.py +0 -119
- lionagi/experimental/evaluator/base_evaluator.py +0 -213
- lionagi/experimental/knowledge/__init__.py +0 -0
- lionagi/experimental/knowledge/base.py +0 -10
- lionagi/experimental/knowledge/graph.py +0 -0
- lionagi/experimental/memory/__init__.py +0 -0
- lionagi/experimental/strategies/__init__.py +0 -0
- lionagi/experimental/strategies/base.py +0 -1
- lionagi/integrations/bridge/__init__.py +0 -4
- lionagi/integrations/bridge/autogen_/__init__.py +0 -0
- lionagi/integrations/bridge/autogen_/autogen_.py +0 -127
- lionagi/integrations/bridge/langchain_/__init__.py +0 -0
- lionagi/integrations/bridge/langchain_/documents.py +0 -138
- lionagi/integrations/bridge/langchain_/langchain_bridge.py +0 -68
- lionagi/integrations/bridge/llamaindex_/__init__.py +0 -0
- lionagi/integrations/bridge/llamaindex_/index.py +0 -36
- lionagi/integrations/bridge/llamaindex_/llama_index_bridge.py +0 -108
- lionagi/integrations/bridge/llamaindex_/llama_pack.py +0 -256
- lionagi/integrations/bridge/llamaindex_/node_parser.py +0 -92
- lionagi/integrations/bridge/llamaindex_/reader.py +0 -201
- lionagi/integrations/bridge/llamaindex_/textnode.py +0 -59
- lionagi/integrations/bridge/pydantic_/__init__.py +0 -0
- lionagi/integrations/bridge/pydantic_/pydantic_bridge.py +0 -7
- lionagi/integrations/bridge/transformers_/__init__.py +0 -0
- lionagi/integrations/bridge/transformers_/install_.py +0 -39
- lionagi/integrations/chunker/__init__.py +0 -0
- lionagi/integrations/chunker/chunk.py +0 -314
- lionagi/integrations/config/__init__.py +0 -4
- lionagi/integrations/config/mlx_configs.py +0 -1
- lionagi/integrations/config/oai_configs.py +0 -154
- lionagi/integrations/config/ollama_configs.py +0 -1
- lionagi/integrations/config/openrouter_configs.py +0 -74
- lionagi/integrations/langchain_/__init__.py +0 -0
- lionagi/integrations/llamaindex_/__init__.py +0 -0
- lionagi/integrations/loader/__init__.py +0 -0
- lionagi/integrations/loader/load.py +0 -257
- lionagi/integrations/loader/load_util.py +0 -214
- lionagi/integrations/provider/__init__.py +0 -11
- lionagi/integrations/provider/_mapping.py +0 -47
- lionagi/integrations/provider/litellm.py +0 -53
- lionagi/integrations/provider/mistralai.py +0 -1
- lionagi/integrations/provider/mlx_service.py +0 -55
- lionagi/integrations/provider/oai.py +0 -196
- lionagi/integrations/provider/ollama.py +0 -55
- lionagi/integrations/provider/openrouter.py +0 -170
- lionagi/integrations/provider/services.py +0 -138
- lionagi/integrations/provider/transformers.py +0 -108
- lionagi/integrations/storage/__init__.py +0 -3
- lionagi/integrations/storage/neo4j.py +0 -681
- lionagi/integrations/storage/storage_util.py +0 -302
- lionagi/integrations/storage/structure_excel.py +0 -291
- lionagi/integrations/storage/to_csv.py +0 -70
- lionagi/integrations/storage/to_excel.py +0 -91
- lionagi/libs/ln_api.py +0 -944
- lionagi/libs/ln_async.py +0 -208
- lionagi/libs/ln_context.py +0 -37
- lionagi/libs/ln_convert.py +0 -671
- lionagi/libs/ln_dataframe.py +0 -187
- lionagi/libs/ln_func_call.py +0 -1328
- lionagi/libs/ln_image.py +0 -114
- lionagi/libs/ln_knowledge_graph.py +0 -422
- lionagi/libs/ln_nested.py +0 -822
- lionagi/libs/ln_parse.py +0 -750
- lionagi/libs/ln_queue.py +0 -107
- lionagi/libs/ln_tokenize.py +0 -179
- lionagi/libs/ln_validate.py +0 -299
- lionagi/libs/special_tokens.py +0 -172
- lionagi/libs/sys_util.py +0 -710
- lionagi/lions/__init__.py +0 -0
- lionagi/lions/coder/__init__.py +0 -0
- lionagi/lions/coder/add_feature.py +0 -20
- lionagi/lions/coder/base_prompts.py +0 -22
- lionagi/lions/coder/code_form.py +0 -15
- lionagi/lions/coder/coder.py +0 -184
- lionagi/lions/coder/util.py +0 -101
- lionagi/lions/director/__init__.py +0 -0
- lionagi/lions/judge/__init__.py +0 -0
- lionagi/lions/judge/config.py +0 -8
- lionagi/lions/judge/data/__init__.py +0 -0
- lionagi/lions/judge/data/sample_codes.py +0 -526
- lionagi/lions/judge/data/sample_rurbic.py +0 -48
- lionagi/lions/judge/forms/__init__.py +0 -0
- lionagi/lions/judge/forms/code_analysis_form.py +0 -126
- lionagi/lions/judge/rubric.py +0 -34
- lionagi/lions/judge/services/__init__.py +0 -0
- lionagi/lions/judge/services/judge_code.py +0 -49
- lionagi/lions/researcher/__init__.py +0 -0
- lionagi/lions/researcher/data_source/__init__.py +0 -0
- lionagi/lions/researcher/data_source/finhub_.py +0 -192
- lionagi/lions/researcher/data_source/google_.py +0 -207
- lionagi/lions/researcher/data_source/wiki_.py +0 -98
- lionagi/lions/researcher/data_source/yfinance_.py +0 -21
- lionagi/operations/brainstorm.py +0 -87
- lionagi/operations/config.py +0 -6
- lionagi/operations/rank.py +0 -102
- lionagi/operations/score.py +0 -144
- lionagi/operations/select.py +0 -141
- lionagi-0.3.8.dist-info/METADATA +0 -241
- lionagi-0.3.8.dist-info/RECORD +0 -249
- /lionagi/{core/_setting → integrations/anthropic_/api_endpoints/messages/response}/__init__.py +0 -0
- /lionagi/{core/agent → integrations/groq_/api_endpoints}/__init__.py +0 -0
- /lionagi/{core/agent/eval → integrations/ollama_/api_endpoints/completion}/__init__.py +0 -0
- /lionagi/{core/agent/learn → integrations/ollama_/api_endpoints/embedding}/__init__.py +0 -0
- /lionagi/{core/agent/plan → integrations/openai_}/__init__.py +0 -0
- /lionagi/{core/director → integrations/openai_/api_endpoints/chat_completions/response}/__init__.py +0 -0
- /lionagi/{core/director/operations → integrations/openai_/image_token_calculator}/__init__.py +0 -0
- /lionagi/{core/engine → integrations/perplexity_/api_endpoints}/__init__.py +0 -0
- /lionagi/{core/executor → integrations/perplexity_/api_endpoints/chat_completions}/__init__.py +0 -0
- /lionagi/{core/generic/registry/component_registry → integrations/perplexity_/api_endpoints/chat_completions/request}/__init__.py +0 -0
- /lionagi/{core/rule → integrations/perplexity_/api_endpoints/chat_completions/response}/__init__.py +0 -0
- /lionagi/{core/unit/template → libs/compress}/__init__.py +0 -0
- /lionagi/{core/validator → libs/file}/__init__.py +0 -0
- /lionagi/{core/work → libs/func}/__init__.py +0 -0
- /lionagi/{experimental → libs/package}/__init__.py +0 -0
- /lionagi/{core/agent/plan/plan.py → libs/parse/params.py} +0 -0
- /lionagi/{experimental/compressor → protocols}/__init__.py +0 -0
- /lionagi/{experimental/directive/parser → protocols/adapters}/__init__.py +0 -0
- /lionagi/{experimental/directive/template → protocols/registries}/__init__.py +0 -0
- /lionagi/{experimental/evaluator → strategies}/__init__.py +0 -0
- {lionagi-0.3.8.dist-info → lionagi-0.5.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
"""
|
2
|
+
Copyright 2024 HaiyangLi
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
"""
|
16
|
+
|
17
|
+
import asyncio
|
18
|
+
|
19
|
+
from lionagi.integrations.litellm_.imodel import iModel
|
20
|
+
|
21
|
+
from .models import PerplexityScores
|
22
|
+
|
23
|
+
|
24
|
+
async def compute_perplexity(
|
25
|
+
imodel: iModel,
|
26
|
+
initial_context: str = None,
|
27
|
+
tokens: list[str] = None,
|
28
|
+
system_msg: str = None,
|
29
|
+
n_samples: int = 1, # number of samples used for the computation
|
30
|
+
use_residue: bool = True, # whether to use residue for the last sample
|
31
|
+
**kwargs,
|
32
|
+
) -> list[PerplexityScores]:
|
33
|
+
tasks = []
|
34
|
+
context = initial_context or ""
|
35
|
+
|
36
|
+
n_samples = n_samples or len(tokens)
|
37
|
+
sample_token_len, residue = divmod(len(tokens), n_samples)
|
38
|
+
samples = []
|
39
|
+
|
40
|
+
if n_samples == 1:
|
41
|
+
samples = [tokens]
|
42
|
+
else:
|
43
|
+
samples = [
|
44
|
+
tokens[: (i + 1) * sample_token_len] for i in range(n_samples)
|
45
|
+
]
|
46
|
+
|
47
|
+
if use_residue and residue != 0:
|
48
|
+
samples.append(tokens[-residue:])
|
49
|
+
|
50
|
+
sampless = [context + " ".join(sample) for sample in samples]
|
51
|
+
|
52
|
+
for sample in sampless:
|
53
|
+
messages = (
|
54
|
+
[{"role": "system", "content": system_msg}] if system_msg else []
|
55
|
+
)
|
56
|
+
messages.append(
|
57
|
+
{"role": "user", "content": sample},
|
58
|
+
)
|
59
|
+
task = asyncio.create_task(
|
60
|
+
imodel.invoke(
|
61
|
+
messages=messages,
|
62
|
+
logprobs=True,
|
63
|
+
max_tokens=sample_token_len,
|
64
|
+
**kwargs,
|
65
|
+
)
|
66
|
+
)
|
67
|
+
tasks.append(task)
|
68
|
+
|
69
|
+
results = await asyncio.gather(*tasks)
|
70
|
+
|
71
|
+
outs = []
|
72
|
+
|
73
|
+
for idx, item in enumerate(results):
|
74
|
+
p = PerplexityScores(
|
75
|
+
completion_response=item,
|
76
|
+
original_tokens=samples[idx],
|
77
|
+
n_samples=n_samples,
|
78
|
+
)
|
79
|
+
outs.append(p)
|
80
|
+
|
81
|
+
return outs
|
@@ -0,0 +1,98 @@
|
|
1
|
+
from typing import Any, Final, Literal, TypedDict, TypeVar, Union
|
2
|
+
|
3
|
+
|
4
|
+
class UndefinedType:
|
5
|
+
def __init__(self) -> None:
|
6
|
+
self.undefined = True
|
7
|
+
|
8
|
+
def __bool__(self) -> Literal[False]:
|
9
|
+
return False
|
10
|
+
|
11
|
+
def __deepcopy__(self, memo):
|
12
|
+
# Ensure UNDEFINED is universal
|
13
|
+
return self
|
14
|
+
|
15
|
+
def __repr__(self) -> Literal["UNDEFINED"]:
|
16
|
+
return "UNDEFINED"
|
17
|
+
|
18
|
+
__slots__ = ["undefined"]
|
19
|
+
|
20
|
+
|
21
|
+
UNDEFINED = UndefinedType()
|
22
|
+
|
23
|
+
# Type definitions
|
24
|
+
NUM_TYPE_LITERAL = Literal["int", "float", "complex"]
|
25
|
+
NUM_TYPES = Union[type[int], type[float], type[complex], NUM_TYPE_LITERAL]
|
26
|
+
NumericType = TypeVar("NumericType", int, float, complex)
|
27
|
+
|
28
|
+
# Type mapping
|
29
|
+
TYPE_MAP = {"int": int, "float": float, "complex": complex}
|
30
|
+
|
31
|
+
# Regex patterns for different numeric formats
|
32
|
+
PATTERNS = {
|
33
|
+
"scientific": r"[-+]?(?:\d*\.)?\d+[eE][-+]?\d+",
|
34
|
+
"complex_sci": r"[-+]?(?:\d*\.)?\d+(?:[eE][-+]?\d+)?[-+](?:\d*\.)?\d+(?:[eE][-+]?\d+)?[jJ]",
|
35
|
+
"complex": r"[-+]?(?:\d*\.)?\d+[-+](?:\d*\.)?\d+[jJ]",
|
36
|
+
"pure_imaginary": r"[-+]?(?:\d*\.)?\d*[jJ]",
|
37
|
+
"percentage": r"[-+]?(?:\d*\.)?\d+%",
|
38
|
+
"fraction": r"[-+]?\d+/\d+",
|
39
|
+
"decimal": r"[-+]?(?:\d*\.)?\d+",
|
40
|
+
"special": r"[-+]?(?:inf|infinity|nan)",
|
41
|
+
}
|
42
|
+
|
43
|
+
|
44
|
+
md_json_char_map = {"'": '\\"', "\n": "\\n", "\r": "\\r", "\t": "\\t"}
|
45
|
+
|
46
|
+
py_json_msp = {
|
47
|
+
"str": "string",
|
48
|
+
"int": "number",
|
49
|
+
"float": "number",
|
50
|
+
"list": "array",
|
51
|
+
"tuple": "array",
|
52
|
+
"bool": "boolean",
|
53
|
+
"dict": "object",
|
54
|
+
}
|
55
|
+
|
56
|
+
|
57
|
+
# Define constants for valid boolean string representations
|
58
|
+
TRUE_VALUES: Final[frozenset[str]] = frozenset(
|
59
|
+
[
|
60
|
+
"true",
|
61
|
+
"1",
|
62
|
+
"yes",
|
63
|
+
"y",
|
64
|
+
"on",
|
65
|
+
"correct",
|
66
|
+
"t",
|
67
|
+
"enabled",
|
68
|
+
"enable",
|
69
|
+
"active",
|
70
|
+
"activated",
|
71
|
+
]
|
72
|
+
)
|
73
|
+
|
74
|
+
FALSE_VALUES: Final[frozenset[str]] = frozenset(
|
75
|
+
[
|
76
|
+
"false",
|
77
|
+
"0",
|
78
|
+
"no",
|
79
|
+
"n",
|
80
|
+
"off",
|
81
|
+
"incorrect",
|
82
|
+
"f",
|
83
|
+
"disabled",
|
84
|
+
"disable",
|
85
|
+
"inactive",
|
86
|
+
"deactivated",
|
87
|
+
"none",
|
88
|
+
"null",
|
89
|
+
"n/a",
|
90
|
+
"na",
|
91
|
+
]
|
92
|
+
)
|
93
|
+
|
94
|
+
|
95
|
+
class KeysDict(TypedDict, total=False):
|
96
|
+
"""TypedDict for keys dictionary."""
|
97
|
+
|
98
|
+
key: Any # Represents any key-type pair
|
@@ -0,0 +1,265 @@
|
|
1
|
+
import math
|
2
|
+
from collections.abc import Callable
|
3
|
+
from typing import Any, Literal
|
4
|
+
|
5
|
+
|
6
|
+
def chunk_by_chars(
|
7
|
+
text: str, chunk_size: int = 2048, overlap: float = 0, threshold: int = 256
|
8
|
+
) -> list[str]:
|
9
|
+
"""
|
10
|
+
Split a text into chunks of approximately equal size, with optional overlap.
|
11
|
+
|
12
|
+
This function divides the input text into chunks based on the specified
|
13
|
+
chunk size. It handles different scenarios based on the number of chunks
|
14
|
+
required and provides options for overlap between chunks.
|
15
|
+
|
16
|
+
Args:
|
17
|
+
text (str): The input text to be chunked.
|
18
|
+
chunk_size (int, optional): The target size for each chunk. Defaults to 2048.
|
19
|
+
overlap (float, optional): The fraction of overlap between chunks. Defaults to 0.
|
20
|
+
threshold (int, optional): The minimum size for the last chunk. Defaults to 256.
|
21
|
+
|
22
|
+
Returns:
|
23
|
+
List[str]: A list of text chunks.
|
24
|
+
|
25
|
+
Raises:
|
26
|
+
ValueError: If an error occurs during the chunking process.
|
27
|
+
|
28
|
+
Examples:
|
29
|
+
>>> text = "This is a sample text for chunking."
|
30
|
+
>>> chunks = chunk_by_chars(text, chunk_size=10, overlap=0.2)
|
31
|
+
>>> print(chunks)
|
32
|
+
['This is a ', 'a sample ', 'le text fo', 'for chunki', 'king.']
|
33
|
+
"""
|
34
|
+
try:
|
35
|
+
n_chunks = math.ceil(len(text) / chunk_size)
|
36
|
+
overlap_size = int(chunk_size * overlap / 2)
|
37
|
+
|
38
|
+
if n_chunks == 1:
|
39
|
+
return [text]
|
40
|
+
elif n_chunks == 2:
|
41
|
+
return _chunk_two_parts(text, chunk_size, overlap_size, threshold)
|
42
|
+
else:
|
43
|
+
return _chunk_multiple_parts(
|
44
|
+
text, chunk_size, overlap_size, n_chunks, threshold
|
45
|
+
)
|
46
|
+
except Exception as e:
|
47
|
+
raise ValueError(f"An error occurred while chunking the text: {e}")
|
48
|
+
|
49
|
+
|
50
|
+
def _chunk_two_parts(
|
51
|
+
text: str, chunk_size: int, overlap_size: int, threshold: int
|
52
|
+
) -> list[str]:
|
53
|
+
"""Handle chunking for two parts."""
|
54
|
+
first_chunk = text[: chunk_size + overlap_size]
|
55
|
+
if len(text) - chunk_size > threshold:
|
56
|
+
return [first_chunk, text[chunk_size - overlap_size :]]
|
57
|
+
return [text]
|
58
|
+
|
59
|
+
|
60
|
+
def _chunk_multiple_parts(
|
61
|
+
text: str,
|
62
|
+
chunk_size: int,
|
63
|
+
overlap_size: int,
|
64
|
+
n_chunks: int,
|
65
|
+
threshold: int,
|
66
|
+
) -> list[str]:
|
67
|
+
"""Handle chunking for more than two parts."""
|
68
|
+
chunks = [text[: chunk_size + overlap_size]]
|
69
|
+
|
70
|
+
for i in range(1, n_chunks - 1):
|
71
|
+
start_idx = chunk_size * i - overlap_size
|
72
|
+
end_idx = chunk_size * (i + 1) + overlap_size
|
73
|
+
chunks.append(text[start_idx:end_idx])
|
74
|
+
|
75
|
+
last_chunk_start = chunk_size * (n_chunks - 1) - overlap_size
|
76
|
+
if len(text) - last_chunk_start > threshold:
|
77
|
+
chunks.append(text[last_chunk_start:])
|
78
|
+
else:
|
79
|
+
chunks[-1] += text[chunk_size * (n_chunks - 1) + overlap_size :]
|
80
|
+
|
81
|
+
return chunks
|
82
|
+
|
83
|
+
|
84
|
+
def chunk_by_tokens(
|
85
|
+
tokens: list[str],
|
86
|
+
chunk_size: int = 1024,
|
87
|
+
overlap: float = 0,
|
88
|
+
threshold: int = 128,
|
89
|
+
return_tokens: bool = False,
|
90
|
+
) -> list[str | list[str]]:
|
91
|
+
"""
|
92
|
+
Split a list of tokens into chunks of approximately equal size, with optional overlap.
|
93
|
+
|
94
|
+
This function divides the input tokens into chunks based on the specified
|
95
|
+
chunk size. It handles different scenarios based on the number of chunks
|
96
|
+
required and provides options for overlap between chunks.
|
97
|
+
|
98
|
+
Args:
|
99
|
+
tokens (list[str]): The input list of tokens to be chunked.
|
100
|
+
chunk_size (int, optional): The target size for each chunk. Defaults to 1024.
|
101
|
+
overlap (float, optional): The fraction of overlap between chunks. Defaults to 0.
|
102
|
+
threshold (int, optional): The minimum size for the last chunk. Defaults to 128.
|
103
|
+
return_tokens (bool, optional): If True, return chunks as lists of tokens;
|
104
|
+
if False, return as joined strings. Defaults to False.
|
105
|
+
|
106
|
+
Returns:
|
107
|
+
list[Union[str, list[str]]]: A list of chunked tokens, either as strings or token lists.
|
108
|
+
|
109
|
+
Raises:
|
110
|
+
ValueError: If an error occurs during the chunking process.
|
111
|
+
|
112
|
+
Examples:
|
113
|
+
>>> tokens = ["This", "is", "a", "sample", "text", "for", "chunking."]
|
114
|
+
>>> chunks = chunk_by_tokens(tokens, chunk_size=3, overlap=0.2)
|
115
|
+
>>> print(chunks)
|
116
|
+
['This is a', 'a sample text', 'text for chunking.']
|
117
|
+
"""
|
118
|
+
try:
|
119
|
+
n_chunks = math.ceil(len(tokens) / chunk_size)
|
120
|
+
overlap_size = int(overlap * chunk_size / 2)
|
121
|
+
residue = len(tokens) % chunk_size
|
122
|
+
|
123
|
+
if n_chunks == 1:
|
124
|
+
return _process_single_chunk(tokens, return_tokens)
|
125
|
+
elif n_chunks == 2:
|
126
|
+
return _chunk_token_two_parts(
|
127
|
+
tokens,
|
128
|
+
chunk_size,
|
129
|
+
overlap_size,
|
130
|
+
threshold,
|
131
|
+
residue,
|
132
|
+
return_tokens,
|
133
|
+
)
|
134
|
+
else:
|
135
|
+
return _chunk_token_multiple_parts(
|
136
|
+
tokens,
|
137
|
+
chunk_size,
|
138
|
+
overlap_size,
|
139
|
+
n_chunks,
|
140
|
+
threshold,
|
141
|
+
residue,
|
142
|
+
return_tokens,
|
143
|
+
)
|
144
|
+
except Exception as e:
|
145
|
+
raise ValueError(f"An error occurred while chunking the tokens: {e}")
|
146
|
+
|
147
|
+
|
148
|
+
def _process_single_chunk(
|
149
|
+
tokens: list[str], return_tokens: bool
|
150
|
+
) -> list[str | list[str]]:
|
151
|
+
"""Handle processing for a single chunk."""
|
152
|
+
return [tokens] if return_tokens else [" ".join(tokens).strip()]
|
153
|
+
|
154
|
+
|
155
|
+
def _chunk_token_two_parts(
|
156
|
+
tokens: list[str],
|
157
|
+
chunk_size: int,
|
158
|
+
overlap_size: int,
|
159
|
+
threshold: int,
|
160
|
+
residue: int,
|
161
|
+
return_tokens: bool,
|
162
|
+
) -> list[str | list[str]]:
|
163
|
+
"""Handle chunking for two parts."""
|
164
|
+
chunks = [tokens[: chunk_size + overlap_size]]
|
165
|
+
if residue > threshold:
|
166
|
+
chunks.append(tokens[chunk_size - overlap_size :])
|
167
|
+
else:
|
168
|
+
return _process_single_chunk(tokens, return_tokens)
|
169
|
+
return _format_chunks(chunks, return_tokens)
|
170
|
+
|
171
|
+
|
172
|
+
def _chunk_token_multiple_parts(
|
173
|
+
tokens: list[str],
|
174
|
+
chunk_size: int,
|
175
|
+
overlap_size: int,
|
176
|
+
n_chunks: int,
|
177
|
+
threshold: int,
|
178
|
+
residue: int,
|
179
|
+
return_tokens: bool,
|
180
|
+
) -> list[str | list[str]]:
|
181
|
+
"""Handle chunking for more than two parts."""
|
182
|
+
chunks = [tokens[: chunk_size + overlap_size]]
|
183
|
+
for i in range(1, n_chunks - 1):
|
184
|
+
start_idx = chunk_size * i - overlap_size
|
185
|
+
end_idx = chunk_size * (i + 1) + overlap_size
|
186
|
+
chunks.append(tokens[start_idx:end_idx])
|
187
|
+
|
188
|
+
last_chunk_start = chunk_size * (n_chunks - 1) - overlap_size
|
189
|
+
if len(tokens) - last_chunk_start > threshold:
|
190
|
+
chunks.append(tokens[last_chunk_start:])
|
191
|
+
else:
|
192
|
+
chunks[-1] += tokens[-residue:]
|
193
|
+
|
194
|
+
return _format_chunks(chunks, return_tokens)
|
195
|
+
|
196
|
+
|
197
|
+
def _format_chunks(
|
198
|
+
chunks: list[list[str]], return_tokens: bool
|
199
|
+
) -> list[str | list[str]]:
|
200
|
+
"""Format chunks based on the return_tokens flag."""
|
201
|
+
return (
|
202
|
+
chunks
|
203
|
+
if return_tokens
|
204
|
+
else [" ".join(chunk).strip() for chunk in chunks]
|
205
|
+
)
|
206
|
+
|
207
|
+
|
208
|
+
def chunk_content(
|
209
|
+
content: str,
|
210
|
+
chunk_by: Literal["chars", "tokens"] = "chars",
|
211
|
+
tokenizer: Callable[[str], list[str]] = str.split,
|
212
|
+
chunk_size: int = 1024,
|
213
|
+
overlap: float = 0,
|
214
|
+
threshold: int = 256,
|
215
|
+
metadata: dict[str, Any] = {},
|
216
|
+
return_tokens: bool = False,
|
217
|
+
**kwargs: Any,
|
218
|
+
) -> list[dict[str, Any]]:
|
219
|
+
"""
|
220
|
+
Split content into chunks and add metadata.
|
221
|
+
|
222
|
+
This function takes a string content, splits it into chunks using the provided
|
223
|
+
chunking function, and adds metadata to each chunk.
|
224
|
+
|
225
|
+
Args:
|
226
|
+
content (str): The content to be chunked.
|
227
|
+
chunk_by(str): The method to use for chunking: "chars" or "tokens".
|
228
|
+
tokenizer (Callable): The function to use for tokenization. defaults to str.split.
|
229
|
+
chunk_size (int): The target size for each chunk.
|
230
|
+
overlap (float): The fraction of overlap between chunks.
|
231
|
+
threshold (int): The minimum size for the last chunk.
|
232
|
+
metadata (Dict[str, Any]): Metadata to be included with each chunk.
|
233
|
+
kwargs for tokenizer, if needed.
|
234
|
+
|
235
|
+
|
236
|
+
Returns:
|
237
|
+
List[Dict[str, Any]]: A list of dictionaries, each representing a chunk with metadata.
|
238
|
+
"""
|
239
|
+
|
240
|
+
if chunk_by == "tokens":
|
241
|
+
chunks = chunk_by_tokens(
|
242
|
+
tokens=tokenizer(content, **kwargs),
|
243
|
+
chunk_size=chunk_size,
|
244
|
+
overlap=overlap,
|
245
|
+
threshold=threshold,
|
246
|
+
return_tokens=return_tokens,
|
247
|
+
)
|
248
|
+
else:
|
249
|
+
chunks = chunk_by_chars(
|
250
|
+
text=content,
|
251
|
+
chunk_size=chunk_size,
|
252
|
+
overlap=overlap,
|
253
|
+
threshold=threshold,
|
254
|
+
)
|
255
|
+
|
256
|
+
return [
|
257
|
+
{
|
258
|
+
"chunk_content": chunk,
|
259
|
+
"chunk_id": i + 1,
|
260
|
+
"total_chunks": len(chunks),
|
261
|
+
"chunk_size": len(chunk),
|
262
|
+
**metadata,
|
263
|
+
}
|
264
|
+
for i, chunk in enumerate(chunks)
|
265
|
+
]
|
@@ -0,0 +1,114 @@
|
|
1
|
+
import logging
|
2
|
+
from pathlib import Path
|
3
|
+
from shutil import copy2
|
4
|
+
|
5
|
+
|
6
|
+
def copy_file(src: Path | str, dest: Path | str) -> None:
|
7
|
+
"""
|
8
|
+
Copy a file from a source path to a destination path.
|
9
|
+
|
10
|
+
Args:
|
11
|
+
src: The source file path.
|
12
|
+
dest: The destination file path.
|
13
|
+
|
14
|
+
Raises:
|
15
|
+
FileNotFoundError: If the source file does not exist or is not
|
16
|
+
a file.
|
17
|
+
PermissionError: If there are insufficient permissions to copy
|
18
|
+
the file.
|
19
|
+
OSError: If there's an OS-level error during the copy operation.
|
20
|
+
"""
|
21
|
+
src_path, dest_path = Path(src), Path(dest)
|
22
|
+
if not src_path.is_file():
|
23
|
+
raise FileNotFoundError(f"{src_path} does not exist or is not a file.")
|
24
|
+
|
25
|
+
try:
|
26
|
+
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
27
|
+
copy2(src_path, dest_path)
|
28
|
+
except PermissionError as e:
|
29
|
+
raise PermissionError(
|
30
|
+
f"Permission denied when copying {src_path} to {dest_path}"
|
31
|
+
) from e
|
32
|
+
except OSError as e:
|
33
|
+
raise OSError(f"Failed to copy {src_path} to {dest_path}: {e}") from e
|
34
|
+
|
35
|
+
|
36
|
+
def get_file_size(path: Path | str) -> int:
|
37
|
+
"""
|
38
|
+
Get the size of a file or total size of files in a directory.
|
39
|
+
|
40
|
+
Args:
|
41
|
+
path: The file or directory path.
|
42
|
+
|
43
|
+
Returns:
|
44
|
+
The size in bytes.
|
45
|
+
|
46
|
+
Raises:
|
47
|
+
FileNotFoundError: If the path does not exist.
|
48
|
+
PermissionError: If there are insufficient permissions
|
49
|
+
to access the path.
|
50
|
+
"""
|
51
|
+
path = Path(path)
|
52
|
+
try:
|
53
|
+
if path.is_file():
|
54
|
+
return path.stat().st_size
|
55
|
+
elif path.is_dir():
|
56
|
+
return sum(
|
57
|
+
f.stat().st_size for f in path.rglob("*") if f.is_file()
|
58
|
+
)
|
59
|
+
else:
|
60
|
+
raise FileNotFoundError(f"{path} does not exist.")
|
61
|
+
except PermissionError as e:
|
62
|
+
raise PermissionError(
|
63
|
+
f"Permission denied when accessing {path}"
|
64
|
+
) from e
|
65
|
+
|
66
|
+
|
67
|
+
def list_files(
|
68
|
+
dir_path: Path | str, extension: str | None = None
|
69
|
+
) -> list[Path]:
|
70
|
+
"""
|
71
|
+
List all files in a specified directory with an optional extension
|
72
|
+
filter, including files in subdirectories.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
dir_path: The directory path where files are listed.
|
76
|
+
extension: Filter files by extension.
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
A list of Path objects representing files in the directory.
|
80
|
+
|
81
|
+
Raises:
|
82
|
+
NotADirectoryError: If the provided dir_path is not a directory.
|
83
|
+
"""
|
84
|
+
dir_path = Path(dir_path)
|
85
|
+
if not dir_path.is_dir():
|
86
|
+
raise NotADirectoryError(f"{dir_path} is not a directory.")
|
87
|
+
|
88
|
+
pattern = f"*.{extension}" if extension else "*"
|
89
|
+
return [f for f in dir_path.rglob(pattern) if f.is_file()]
|
90
|
+
|
91
|
+
|
92
|
+
def read_file(path: Path | str, /) -> str:
|
93
|
+
"""
|
94
|
+
Read the contents of a file.
|
95
|
+
|
96
|
+
Args:
|
97
|
+
path: The path to the file to read.
|
98
|
+
|
99
|
+
Returns:
|
100
|
+
str: The contents of the file.
|
101
|
+
|
102
|
+
Raises:
|
103
|
+
FileNotFoundError: If the file does not exist.
|
104
|
+
PermissionError: If there are insufficient permissions to read
|
105
|
+
the file.
|
106
|
+
"""
|
107
|
+
try:
|
108
|
+
return Path(path).read_text(encoding="utf-8")
|
109
|
+
except FileNotFoundError as e:
|
110
|
+
logging.error(f"File not found: {path}: {e}")
|
111
|
+
raise
|
112
|
+
except PermissionError as e:
|
113
|
+
logging.error(f"Permission denied when reading file: {path}: {e}")
|
114
|
+
raise
|