clarifai 11.3.0rc2__py3-none-any.whl → 11.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/__init__.py +1 -1
- clarifai/cli/__main__.py +1 -1
- clarifai/cli/base.py +144 -136
- clarifai/cli/compute_cluster.py +45 -31
- clarifai/cli/deployment.py +93 -76
- clarifai/cli/model.py +578 -180
- clarifai/cli/nodepool.py +100 -82
- clarifai/client/__init__.py +12 -2
- clarifai/client/app.py +973 -911
- clarifai/client/auth/helper.py +345 -342
- clarifai/client/auth/register.py +7 -7
- clarifai/client/auth/stub.py +107 -106
- clarifai/client/base.py +185 -178
- clarifai/client/compute_cluster.py +214 -180
- clarifai/client/dataset.py +793 -698
- clarifai/client/deployment.py +55 -50
- clarifai/client/input.py +1223 -1088
- clarifai/client/lister.py +47 -45
- clarifai/client/model.py +1939 -1717
- clarifai/client/model_client.py +525 -502
- clarifai/client/module.py +82 -73
- clarifai/client/nodepool.py +358 -213
- clarifai/client/runner.py +58 -0
- clarifai/client/search.py +342 -309
- clarifai/client/user.py +419 -414
- clarifai/client/workflow.py +294 -274
- clarifai/constants/dataset.py +11 -17
- clarifai/constants/model.py +8 -2
- clarifai/datasets/export/inputs_annotations.py +233 -217
- clarifai/datasets/upload/base.py +63 -51
- clarifai/datasets/upload/features.py +43 -38
- clarifai/datasets/upload/image.py +237 -207
- clarifai/datasets/upload/loaders/coco_captions.py +34 -32
- clarifai/datasets/upload/loaders/coco_detection.py +72 -65
- clarifai/datasets/upload/loaders/imagenet_classification.py +57 -53
- clarifai/datasets/upload/loaders/xview_detection.py +274 -132
- clarifai/datasets/upload/multimodal.py +55 -46
- clarifai/datasets/upload/text.py +55 -47
- clarifai/datasets/upload/utils.py +250 -234
- clarifai/errors.py +51 -50
- clarifai/models/api.py +260 -238
- clarifai/modules/css.py +50 -50
- clarifai/modules/pages.py +33 -33
- clarifai/rag/rag.py +312 -288
- clarifai/rag/utils.py +91 -84
- clarifai/runners/models/model_builder.py +906 -802
- clarifai/runners/models/model_class.py +370 -331
- clarifai/runners/models/model_run_locally.py +459 -419
- clarifai/runners/models/model_runner.py +170 -162
- clarifai/runners/models/model_servicer.py +78 -70
- clarifai/runners/server.py +111 -101
- clarifai/runners/utils/code_script.py +225 -187
- clarifai/runners/utils/const.py +4 -1
- clarifai/runners/utils/data_types/__init__.py +12 -0
- clarifai/runners/utils/data_types/data_types.py +598 -0
- clarifai/runners/utils/data_utils.py +387 -440
- clarifai/runners/utils/loader.py +247 -227
- clarifai/runners/utils/method_signatures.py +411 -386
- clarifai/runners/utils/openai_convertor.py +108 -109
- clarifai/runners/utils/serializers.py +175 -179
- clarifai/runners/utils/url_fetcher.py +35 -35
- clarifai/schema/search.py +56 -63
- clarifai/urls/helper.py +125 -102
- clarifai/utils/cli.py +129 -123
- clarifai/utils/config.py +127 -87
- clarifai/utils/constants.py +49 -0
- clarifai/utils/evaluation/helpers.py +503 -466
- clarifai/utils/evaluation/main.py +431 -393
- clarifai/utils/evaluation/testset_annotation_parser.py +154 -144
- clarifai/utils/logging.py +324 -306
- clarifai/utils/misc.py +60 -56
- clarifai/utils/model_train.py +165 -146
- clarifai/utils/protobuf.py +126 -103
- clarifai/versions.py +3 -1
- clarifai/workflows/export.py +48 -50
- clarifai/workflows/utils.py +39 -36
- clarifai/workflows/validate.py +55 -43
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/METADATA +16 -6
- clarifai-11.4.0.dist-info/RECORD +109 -0
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/WHEEL +1 -1
- clarifai/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/__pycache__/errors.cpython-310.pyc +0 -0
- clarifai/__pycache__/errors.cpython-311.pyc +0 -0
- clarifai/__pycache__/versions.cpython-310.pyc +0 -0
- clarifai/__pycache__/versions.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/compute_cluster.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/compute_cluster.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/deployment.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/deployment.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/nodepool.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/nodepool.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/compute_cluster.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/compute_cluster.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/module.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/module.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/nodepool.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/nodepool.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/user.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/user.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/workflow.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/workflow.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-311.pyc +0 -0
- clarifai/client/cli/__init__.py +0 -0
- clarifai/client/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
- clarifai/client/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
- clarifai/client/cli/base_cli.py +0 -88
- clarifai/client/cli/model_cli.py +0 -29
- clarifai/constants/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/rag.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/rag.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/workflow.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/workflow.cpython-311.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-310.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/coco_detection.cpython-311.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/imagenet_classification.cpython-311.pyc +0 -0
- clarifai/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/modules/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-311.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/dockerfile_template/Dockerfile.cpu.template +0 -31
- clarifai/runners/dockerfile_template/Dockerfile.cuda.template +0 -42
- clarifai/runners/dockerfile_template/Dockerfile.nim +0 -71
- clarifai/runners/models/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/model_builder.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_class.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_class.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-310-pytest-7.1.2.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_runner.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_runner.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_upload.cpython-310.pyc +0 -0
- clarifai/runners/models/base_typed_model.py +0 -238
- clarifai/runners/models/model_class_refract.py +0 -80
- clarifai/runners/models/model_upload.py +0 -607
- clarifai/runners/models/temp.py +0 -25
- clarifai/runners/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/const.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/const.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/loader.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/loader.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/stream_source.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/stream_source.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-39.pyc +0 -0
- clarifai/runners/utils/data_handler.py +0 -231
- clarifai/runners/utils/data_handler_refract.py +0 -213
- clarifai/runners/utils/data_types.py +0 -469
- clarifai/runners/utils/logger.py +0 -0
- clarifai/runners/utils/openai_format.py +0 -87
- clarifai/schema/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/schema/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-310.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/cli.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/cli.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/config.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/model_train.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/model_train.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/protobuf.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/helpers.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/main.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/main.cpython-39.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/workflows/__pycache__/export.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/export.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/validate.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/validate.cpython-311.pyc +0 -0
- clarifai-11.3.0rc2.dist-info/RECORD +0 -322
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/entry_points.txt +0 -0
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info/licenses}/LICENSE +0 -0
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/top_level.txt +0 -0
clarifai/rag/utils.py
CHANGED
@@ -7,7 +7,7 @@ import requests
|
|
7
7
|
|
8
8
|
## TODO: Make this token-aware.
|
9
9
|
def convert_messages_to_str(messages: List[dict]) -> str:
|
10
|
-
|
10
|
+
"""convert messages in OpenAI API format into a single string.
|
11
11
|
|
12
12
|
Args:
|
13
13
|
messages List[dict]: A list of dictionary in the following format:
|
@@ -19,100 +19,107 @@ def convert_messages_to_str(messages: List[dict]) -> str:
|
|
19
19
|
]
|
20
20
|
```
|
21
21
|
"""
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
22
|
+
final_str = ""
|
23
|
+
for msg in messages:
|
24
|
+
if "role" in msg and "content" in msg:
|
25
|
+
role = msg.get("role", "")
|
26
|
+
content = msg.get("content", "")
|
27
|
+
final_str += f"\n\n{role}: {content}"
|
28
|
+
return final_str
|
29
29
|
|
30
30
|
|
31
31
|
def format_assistant_message(raw_text: str) -> dict:
|
32
|
-
|
32
|
+
return {"role": "assistant", "content": raw_text}
|
33
33
|
|
34
34
|
|
35
35
|
def load_documents(file_path: str = None, folder_path: str = None, url: str = None) -> List[any]:
|
36
|
-
|
36
|
+
"""Loads documents from a local directory or public url or local filename.
|
37
37
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
38
|
+
Args:
|
39
|
+
file_path (str): The path to the filename.
|
40
|
+
folder_path (str): The path to the folder.
|
41
|
+
url (str): The url to the file.
|
42
|
+
"""
|
43
|
+
# check import packages
|
44
|
+
try:
|
45
|
+
from llama_index.core import Document, SimpleDirectoryReader
|
46
|
+
from llama_index.core.readers.download import download_loader
|
47
|
+
except ImportError:
|
48
|
+
raise ImportError(
|
49
|
+
"Could not import llama index package. "
|
50
|
+
"Please install it with `pip install llama-index-core==0.10.1`."
|
51
|
+
)
|
52
|
+
# document loaders for filepath
|
53
|
+
if file_path:
|
54
|
+
if file_path.endswith(".pdf"):
|
55
|
+
PDFReader = download_loader("PDFReader")
|
56
|
+
loader = PDFReader()
|
57
|
+
documents = loader.load_data(file=Path(file_path))
|
58
|
+
elif file_path.endswith(".docx"):
|
59
|
+
docReader = download_loader("DocxReader")
|
60
|
+
loader = docReader()
|
61
|
+
documents = loader.load_data(file=Path(file_path))
|
62
|
+
elif file_path.endswith(".txt"):
|
63
|
+
with open(file_path, 'r') as file:
|
64
|
+
text_content = file.read()
|
65
|
+
documents = [Document(text=text_content)]
|
66
|
+
else:
|
67
|
+
raise ValueError("Only .pdf, .docx, and .txt files are supported.")
|
66
68
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
69
|
+
# document loaders for folderpath
|
70
|
+
if folder_path:
|
71
|
+
documents = SimpleDirectoryReader(
|
72
|
+
input_dir=Path(folder_path), required_exts=[".pdf", ".docx", ".txt"]
|
73
|
+
).load_data()
|
71
74
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
75
|
+
# document loaders for url
|
76
|
+
if url:
|
77
|
+
response = requests.get(url)
|
78
|
+
if response.status_code != 200:
|
79
|
+
raise ValueError(f"Invalid url {url}.")
|
80
|
+
# for text files
|
81
|
+
try:
|
82
|
+
documents = [Document(text=response.content)]
|
83
|
+
# for pdf files
|
84
|
+
except Exception:
|
85
|
+
# check import packages
|
86
|
+
try:
|
87
|
+
from pypdf import PdfReader
|
88
|
+
except ImportError:
|
89
|
+
raise ImportError(
|
90
|
+
"Could not import pypdf package. "
|
91
|
+
"Please install it with `pip install pypdf==3.17.4`."
|
92
|
+
)
|
93
|
+
documents = []
|
94
|
+
pdf_file = PdfReader(io.BytesIO(response.content))
|
95
|
+
num_pages = len(pdf_file.pages)
|
96
|
+
for page in range(num_pages):
|
97
|
+
page_text = pdf_file.pages[page].extract_text()
|
98
|
+
documents.append(Document(text=page_text))
|
99
|
+
else:
|
100
|
+
raise ValueError(f"Invalid url {url}.")
|
96
101
|
|
97
|
-
|
102
|
+
return documents
|
98
103
|
|
99
104
|
|
100
105
|
def split_document(text: str, chunk_size: int, chunk_overlap: int, **kwargs) -> List[str]:
|
101
|
-
|
106
|
+
"""Splits a document into chunks of text.
|
102
107
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
108
|
+
Args:
|
109
|
+
text (str): The text to split.
|
110
|
+
chunk_size (int): The size of each chunk.
|
111
|
+
chunk_overlap (int): The amount of overlap between each chunk.
|
112
|
+
**kwargs: Additional keyword arguments for the SentenceSplitter.
|
113
|
+
"""
|
114
|
+
# check import packages
|
115
|
+
try:
|
116
|
+
from llama_index.core.node_parser.text import SentenceSplitter
|
117
|
+
except ImportError:
|
118
|
+
raise ImportError(
|
119
|
+
"Could not import llama index package. "
|
120
|
+
"Please install it with `pip install llama-index-core==0.10.24`."
|
121
|
+
)
|
122
|
+
# document
|
123
|
+
text_parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
|
124
|
+
text_chunks = text_parser.split_text(text)
|
125
|
+
return text_chunks
|