clarifai 11.3.0rc2__py3-none-any.whl → 11.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/__init__.py +1 -1
- clarifai/cli/__main__.py +1 -1
- clarifai/cli/base.py +144 -136
- clarifai/cli/compute_cluster.py +45 -31
- clarifai/cli/deployment.py +93 -76
- clarifai/cli/model.py +578 -180
- clarifai/cli/nodepool.py +100 -82
- clarifai/client/__init__.py +12 -2
- clarifai/client/app.py +973 -911
- clarifai/client/auth/helper.py +345 -342
- clarifai/client/auth/register.py +7 -7
- clarifai/client/auth/stub.py +107 -106
- clarifai/client/base.py +185 -178
- clarifai/client/compute_cluster.py +214 -180
- clarifai/client/dataset.py +793 -698
- clarifai/client/deployment.py +55 -50
- clarifai/client/input.py +1223 -1088
- clarifai/client/lister.py +47 -45
- clarifai/client/model.py +1939 -1717
- clarifai/client/model_client.py +525 -502
- clarifai/client/module.py +82 -73
- clarifai/client/nodepool.py +358 -213
- clarifai/client/runner.py +58 -0
- clarifai/client/search.py +342 -309
- clarifai/client/user.py +419 -414
- clarifai/client/workflow.py +294 -274
- clarifai/constants/dataset.py +11 -17
- clarifai/constants/model.py +8 -2
- clarifai/datasets/export/inputs_annotations.py +233 -217
- clarifai/datasets/upload/base.py +63 -51
- clarifai/datasets/upload/features.py +43 -38
- clarifai/datasets/upload/image.py +237 -207
- clarifai/datasets/upload/loaders/coco_captions.py +34 -32
- clarifai/datasets/upload/loaders/coco_detection.py +72 -65
- clarifai/datasets/upload/loaders/imagenet_classification.py +57 -53
- clarifai/datasets/upload/loaders/xview_detection.py +274 -132
- clarifai/datasets/upload/multimodal.py +55 -46
- clarifai/datasets/upload/text.py +55 -47
- clarifai/datasets/upload/utils.py +250 -234
- clarifai/errors.py +51 -50
- clarifai/models/api.py +260 -238
- clarifai/modules/css.py +50 -50
- clarifai/modules/pages.py +33 -33
- clarifai/rag/rag.py +312 -288
- clarifai/rag/utils.py +91 -84
- clarifai/runners/models/model_builder.py +906 -802
- clarifai/runners/models/model_class.py +370 -331
- clarifai/runners/models/model_run_locally.py +459 -419
- clarifai/runners/models/model_runner.py +170 -162
- clarifai/runners/models/model_servicer.py +78 -70
- clarifai/runners/server.py +111 -101
- clarifai/runners/utils/code_script.py +225 -187
- clarifai/runners/utils/const.py +4 -1
- clarifai/runners/utils/data_types/__init__.py +12 -0
- clarifai/runners/utils/data_types/data_types.py +598 -0
- clarifai/runners/utils/data_utils.py +387 -440
- clarifai/runners/utils/loader.py +247 -227
- clarifai/runners/utils/method_signatures.py +411 -386
- clarifai/runners/utils/openai_convertor.py +108 -109
- clarifai/runners/utils/serializers.py +175 -179
- clarifai/runners/utils/url_fetcher.py +35 -35
- clarifai/schema/search.py +56 -63
- clarifai/urls/helper.py +125 -102
- clarifai/utils/cli.py +129 -123
- clarifai/utils/config.py +127 -87
- clarifai/utils/constants.py +49 -0
- clarifai/utils/evaluation/helpers.py +503 -466
- clarifai/utils/evaluation/main.py +431 -393
- clarifai/utils/evaluation/testset_annotation_parser.py +154 -144
- clarifai/utils/logging.py +324 -306
- clarifai/utils/misc.py +60 -56
- clarifai/utils/model_train.py +165 -146
- clarifai/utils/protobuf.py +126 -103
- clarifai/versions.py +3 -1
- clarifai/workflows/export.py +48 -50
- clarifai/workflows/utils.py +39 -36
- clarifai/workflows/validate.py +55 -43
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/METADATA +16 -6
- clarifai-11.4.0.dist-info/RECORD +109 -0
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/WHEEL +1 -1
- clarifai/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/__pycache__/errors.cpython-310.pyc +0 -0
- clarifai/__pycache__/errors.cpython-311.pyc +0 -0
- clarifai/__pycache__/versions.cpython-310.pyc +0 -0
- clarifai/__pycache__/versions.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/compute_cluster.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/compute_cluster.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/deployment.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/deployment.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/nodepool.cpython-310.pyc +0 -0
- clarifai/cli/__pycache__/nodepool.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/app.cpython-39.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/compute_cluster.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/compute_cluster.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/dataset.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/deployment.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/input.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/lister.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/module.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/module.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/nodepool.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/nodepool.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/user.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/user.cpython-311.pyc +0 -0
- clarifai/client/__pycache__/workflow.cpython-310.pyc +0 -0
- clarifai/client/__pycache__/workflow.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/helper.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/register.cpython-311.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-310.pyc +0 -0
- clarifai/client/auth/__pycache__/stub.cpython-311.pyc +0 -0
- clarifai/client/cli/__init__.py +0 -0
- clarifai/client/cli/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/client/cli/__pycache__/base_cli.cpython-310.pyc +0 -0
- clarifai/client/cli/__pycache__/model_cli.cpython-310.pyc +0 -0
- clarifai/client/cli/base_cli.py +0 -88
- clarifai/client/cli/model_cli.py +0 -29
- clarifai/constants/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/dataset.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/input.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/model.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/rag.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/rag.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/constants/__pycache__/workflow.cpython-310.pyc +0 -0
- clarifai/constants/__pycache__/workflow.cpython-311.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/export/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-310.pyc +0 -0
- clarifai/datasets/export/__pycache__/inputs_annotations.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/base.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/features.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/image.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/multimodal.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/text.cpython-311.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/datasets/upload/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/coco_detection.cpython-311.pyc +0 -0
- clarifai/datasets/upload/loaders/__pycache__/imagenet_classification.cpython-311.pyc +0 -0
- clarifai/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/modules/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/rag/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-311.pyc +0 -0
- clarifai/rag/__pycache__/rag.cpython-39.pyc +0 -0
- clarifai/rag/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/rag/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/dockerfile_template/Dockerfile.cpu.template +0 -31
- clarifai/runners/dockerfile_template/Dockerfile.cuda.template +0 -42
- clarifai/runners/dockerfile_template/Dockerfile.nim +0 -71
- clarifai/runners/models/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/base_typed_model.cpython-39.pyc +0 -0
- clarifai/runners/models/__pycache__/model_builder.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_class.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_class.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-310-pytest-7.1.2.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_run_locally.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_runner.cpython-310.pyc +0 -0
- clarifai/runners/models/__pycache__/model_runner.cpython-311.pyc +0 -0
- clarifai/runners/models/__pycache__/model_upload.cpython-310.pyc +0 -0
- clarifai/runners/models/base_typed_model.py +0 -238
- clarifai/runners/models/model_class_refract.py +0 -80
- clarifai/runners/models/model_upload.py +0 -607
- clarifai/runners/models/temp.py +0 -25
- clarifai/runners/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/buffered_stream.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/const.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/const.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/constants.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_handler.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/data_utils.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/grpc_server.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/health.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/loader.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/loader.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/logging.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/stream_source.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/stream_source.cpython-39.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-310.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-311.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-38.pyc +0 -0
- clarifai/runners/utils/__pycache__/url_fetcher.cpython-39.pyc +0 -0
- clarifai/runners/utils/data_handler.py +0 -231
- clarifai/runners/utils/data_handler_refract.py +0 -213
- clarifai/runners/utils/data_types.py +0 -469
- clarifai/runners/utils/logger.py +0 -0
- clarifai/runners/utils/openai_format.py +0 -87
- clarifai/schema/__pycache__/search.cpython-310.pyc +0 -0
- clarifai/schema/__pycache__/search.cpython-311.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-310.pyc +0 -0
- clarifai/urls/__pycache__/helper.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/__pycache__/cli.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/cli.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/config.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/constants.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/logging.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/misc.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/model_train.cpython-310.pyc +0 -0
- clarifai/utils/__pycache__/model_train.cpython-311.pyc +0 -0
- clarifai/utils/__pycache__/protobuf.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/helpers.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/main.cpython-311.pyc +0 -0
- clarifai/utils/evaluation/__pycache__/main.cpython-39.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/__init__.cpython-39.pyc +0 -0
- clarifai/workflows/__pycache__/export.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/export.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/utils.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/utils.cpython-311.pyc +0 -0
- clarifai/workflows/__pycache__/validate.cpython-310.pyc +0 -0
- clarifai/workflows/__pycache__/validate.cpython-311.pyc +0 -0
- clarifai-11.3.0rc2.dist-info/RECORD +0 -322
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/entry_points.txt +0 -0
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info/licenses}/LICENSE +0 -0
- {clarifai-11.3.0rc2.dist-info → clarifai-11.4.0.dist-info}/top_level.txt +0 -0
clarifai/rag/rag.py
CHANGED
@@ -12,8 +12,12 @@ from clarifai.client.user import User
|
|
12
12
|
from clarifai.client.workflow import Workflow
|
13
13
|
from clarifai.constants.rag import MAX_UPLOAD_BATCH_SIZE
|
14
14
|
from clarifai.errors import UserError
|
15
|
-
from clarifai.rag.utils import (
|
16
|
-
|
15
|
+
from clarifai.rag.utils import (
|
16
|
+
convert_messages_to_str,
|
17
|
+
format_assistant_message,
|
18
|
+
load_documents,
|
19
|
+
split_document,
|
20
|
+
)
|
17
21
|
from clarifai.utils.constants import CLARIFAI_USER_ID_ENV_VAR
|
18
22
|
from clarifai.utils.logging import logger
|
19
23
|
from clarifai.utils.misc import get_from_dict_or_env
|
@@ -22,7 +26,7 @@ DEFAULT_RAG_PROMPT_TEMPLATE = "Context information is below:\n{data.hits}\nGiven
|
|
22
26
|
|
23
27
|
|
24
28
|
class RAG:
|
25
|
-
|
29
|
+
"""
|
26
30
|
RAG is a class for Retrieval Augmented Generation.
|
27
31
|
|
28
32
|
Example:
|
@@ -30,290 +34,310 @@ class RAG:
|
|
30
34
|
>>> rag_agent = RAG(workflow_url=YOUR_WORKFLOW_URL)
|
31
35
|
>>> rag_agent.chat(messages=[{"role":"human", "content":"What is Clarifai"}])
|
32
36
|
"""
|
33
|
-
chat_state_id = None
|
34
|
-
|
35
|
-
def __init__(self,
|
36
|
-
workflow_url: str = None,
|
37
|
-
workflow: Workflow = None,
|
38
|
-
base_url: str = "https://api.clarifai.com",
|
39
|
-
pat: str = None,
|
40
|
-
**kwargs):
|
41
|
-
"""Initialize an empty or existing RAG.
|
42
|
-
"""
|
43
|
-
self.logger = logger
|
44
|
-
if workflow_url is not None and workflow is None:
|
45
|
-
self.logger.info("workflow_url:%s", workflow_url)
|
46
|
-
w = Workflow(workflow_url, base_url=base_url, pat=pat)
|
47
|
-
self._prompt_workflow = w
|
48
|
-
self._app = App(app_id=w.app_id, user_id=w.user_id, base_url=w.base, pat=w.pat)
|
49
|
-
elif workflow_url is None and workflow is not None:
|
50
|
-
self._prompt_workflow = workflow
|
51
|
-
self._app = App(
|
52
|
-
app_id=workflow.app_id,
|
53
|
-
user_id=workflow.user_id,
|
54
|
-
base_url=workflow.base,
|
55
|
-
pat=workflow.pat)
|
56
|
-
|
57
|
-
@classmethod
|
58
|
-
def setup(cls,
|
59
|
-
user_id: str = None,
|
60
|
-
app_url: str = None,
|
61
|
-
llm_url: str = "https://clarifai.com/mistralai/completion/models/mistral-7B-Instruct",
|
62
|
-
base_workflow: str = "Text",
|
63
|
-
workflow_yaml_filename: str = 'prompter_wf.yaml',
|
64
|
-
workflow_id: str = None,
|
65
|
-
base_url: str = "https://api.clarifai.com",
|
66
|
-
pat: str = None,
|
67
|
-
**kwargs):
|
68
|
-
"""Creates an app with `Text` as base workflow, create prompt model, create prompt workflow.
|
69
|
-
|
70
|
-
**kwargs: Additional keyword arguments to be passed to rag-promter model.
|
71
|
-
- min_score (float): The minimum score for search hits.
|
72
|
-
- max_results (float): The maximum number of search hits.
|
73
|
-
- prompt_template (str): The prompt template used. Must contain {data.hits} for the search hits and {data.text.raw} for the query string.
|
74
37
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
38
|
+
chat_state_id = None
|
39
|
+
|
40
|
+
def __init__(
|
41
|
+
self,
|
42
|
+
workflow_url: str = None,
|
43
|
+
workflow: Workflow = None,
|
44
|
+
base_url: str = "https://api.clarifai.com",
|
45
|
+
pat: str = None,
|
46
|
+
**kwargs,
|
47
|
+
):
|
48
|
+
"""Initialize an empty or existing RAG."""
|
49
|
+
self.logger = logger
|
50
|
+
if workflow_url is not None and workflow is None:
|
51
|
+
self.logger.info("workflow_url:%s", workflow_url)
|
52
|
+
w = Workflow(workflow_url, base_url=base_url, pat=pat)
|
53
|
+
self._prompt_workflow = w
|
54
|
+
self._app = App(app_id=w.app_id, user_id=w.user_id, base_url=w.base, pat=w.pat)
|
55
|
+
elif workflow_url is None and workflow is not None:
|
56
|
+
self._prompt_workflow = workflow
|
57
|
+
self._app = App(
|
58
|
+
app_id=workflow.app_id,
|
59
|
+
user_id=workflow.user_id,
|
60
|
+
base_url=workflow.base,
|
61
|
+
pat=workflow.pat,
|
62
|
+
)
|
63
|
+
|
64
|
+
@classmethod
|
65
|
+
def setup(
|
66
|
+
cls,
|
67
|
+
user_id: str = None,
|
68
|
+
app_url: str = None,
|
69
|
+
llm_url: str = "https://clarifai.com/mistralai/completion/models/mistral-7B-Instruct",
|
70
|
+
base_workflow: str = "Text",
|
71
|
+
workflow_yaml_filename: str = 'prompter_wf.yaml',
|
72
|
+
workflow_id: str = None,
|
73
|
+
base_url: str = "https://api.clarifai.com",
|
74
|
+
pat: str = None,
|
75
|
+
**kwargs,
|
76
|
+
):
|
77
|
+
"""Creates an app with `Text` as base workflow, create prompt model, create prompt workflow.
|
78
|
+
|
79
|
+
**kwargs: Additional keyword arguments to be passed to rag-promter model.
|
80
|
+
- min_score (float): The minimum score for search hits.
|
81
|
+
- max_results (float): The maximum number of search hits.
|
82
|
+
- prompt_template (str): The prompt template used. Must contain {data.hits} for the search hits and {data.text.raw} for the query string.
|
83
|
+
|
84
|
+
Example:
|
85
|
+
>>> from clarifai.rag import RAG
|
86
|
+
>>> rag_agent = RAG.setup(user_id=YOUR_USER_ID)
|
87
|
+
>>> rag_agent.chat(messages=[{"role":"human", "content":"What is Clarifai"}])
|
88
|
+
|
89
|
+
Or if you already have an existing app with ingested data:
|
90
|
+
>>> rag_agent = RAG.setup(app_url=YOUR_APP_URL)
|
91
|
+
>>> rag_agent.chat(messages=[{"role":"human", "content":"What is Clarifai"}])
|
92
|
+
"""
|
93
|
+
if not app_url:
|
94
|
+
try:
|
95
|
+
user_id = get_from_dict_or_env(
|
96
|
+
key="user_id", env_key=CLARIFAI_USER_ID_ENV_VAR, **kwargs
|
97
|
+
)
|
98
|
+
except Exception:
|
99
|
+
pass
|
100
|
+
|
101
|
+
now_ts = uuid.uuid4().hex[:10]
|
102
|
+
if user_id and not app_url:
|
103
|
+
user = User(user_id=user_id, base_url=base_url, pat=pat)
|
104
|
+
## Create an App
|
105
|
+
app_id = f"rag_app_{now_ts}"
|
106
|
+
app = user.create_app(app_id=app_id, base_workflow=base_workflow)
|
107
|
+
|
108
|
+
if not user_id and app_url:
|
109
|
+
app = App(url=app_url, pat=pat)
|
110
|
+
uid = app_url.split(".com/")[1].split("/")[0]
|
111
|
+
user = User(user_id=uid, base_url=base_url, pat=pat)
|
112
|
+
|
113
|
+
if user_id and app_url:
|
114
|
+
raise UserError("Must provide one of user_id or app_url, not both.")
|
115
|
+
|
116
|
+
if not user_id and not app_url:
|
117
|
+
raise UserError(
|
118
|
+
"user_id or app_url must be provided. The user_id can be found at https://clarifai.com/settings."
|
119
|
+
)
|
120
|
+
|
121
|
+
llm = Model(url=llm_url, pat=pat)
|
122
|
+
|
123
|
+
min_score = kwargs.get("min_score", 0.95)
|
124
|
+
max_results = kwargs.get("max_results", 5)
|
125
|
+
prompt_template = kwargs.get("prompt_template", DEFAULT_RAG_PROMPT_TEMPLATE)
|
126
|
+
params = Struct()
|
127
|
+
params.update(
|
128
|
+
{
|
129
|
+
"min_score": min_score,
|
130
|
+
"max_results": max_results,
|
131
|
+
"prompt_template": prompt_template,
|
132
|
+
}
|
133
|
+
)
|
134
|
+
prompter_model_params = {"params": params}
|
135
|
+
|
136
|
+
## Create rag-prompter model and version
|
137
|
+
model_id = (
|
138
|
+
f"prompter-{workflow_id}-{now_ts}"
|
139
|
+
if workflow_id is not None
|
140
|
+
else f"rag-prompter-{now_ts}"
|
141
|
+
)
|
142
|
+
prompter_model = app.create_model(model_id=model_id, model_type_id="rag-prompter")
|
143
|
+
prompter_model = prompter_model.create_version(output_info=prompter_model_params)
|
144
|
+
|
145
|
+
## Generate a tmp yaml file for workflow creation
|
146
|
+
workflow_id = f"rag-wf-{now_ts}" if workflow_id is None else workflow_id
|
147
|
+
workflow_dict = {
|
148
|
+
"workflow": {
|
149
|
+
"id": workflow_id,
|
150
|
+
"nodes": [
|
151
|
+
{
|
152
|
+
"id": "rag-prompter",
|
153
|
+
"model": {
|
154
|
+
"model_id": prompter_model.id,
|
155
|
+
"model_version_id": prompter_model.model_version.id,
|
156
|
+
},
|
157
|
+
},
|
158
|
+
{
|
159
|
+
"id": "llm",
|
160
|
+
"model": {
|
161
|
+
"model_id": llm.id,
|
162
|
+
"user_id": llm.user_id,
|
163
|
+
"app_id": llm.app_id,
|
164
|
+
},
|
165
|
+
"node_inputs": [{"node_id": "rag-prompter"}],
|
166
|
+
},
|
167
|
+
],
|
168
|
+
}
|
151
169
|
}
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
170
|
+
with open(workflow_yaml_filename, 'w') as out_file:
|
171
|
+
yaml.dump(workflow_dict, out_file, default_flow_style=False)
|
172
|
+
|
173
|
+
## Create prompt workflow
|
174
|
+
wf = app.create_workflow(config_filepath=workflow_yaml_filename)
|
175
|
+
del user, llm, prompter_model, prompter_model_params
|
176
|
+
return cls(workflow=wf)
|
177
|
+
|
178
|
+
def upload(
|
179
|
+
self,
|
180
|
+
file_path: str = None,
|
181
|
+
folder_path: str = None,
|
182
|
+
url: str = None,
|
183
|
+
batch_size: int = 128,
|
184
|
+
chunk_size: int = 1024,
|
185
|
+
chunk_overlap: int = 200,
|
186
|
+
dataset_id: str = None,
|
187
|
+
metadata: dict = None,
|
188
|
+
**kwargs,
|
189
|
+
) -> None:
|
190
|
+
"""Uploads documents to the app.
|
191
|
+
- Read from a local directory or public url or local filename.
|
192
|
+
- Parse the document(s) into chunks.
|
193
|
+
- Ingest chunks into the app with metadata.
|
194
|
+
|
195
|
+
Args:
|
196
|
+
file_path str: File path to the document.
|
197
|
+
folder_path str: Folder path to the documents.
|
198
|
+
url str: Public url to the document.
|
199
|
+
batch_size int: Batch size for uploading.
|
200
|
+
chunk_size int: Chunk size for splitting the document.
|
201
|
+
chunk_overlap int: The token overlap of each chunk when splitting.
|
202
|
+
**kwargs: Additional arguments for the SentenceSplitter. Refer https://docs.llamaindex.ai/en/stable/api/llama_index.node_parser.SentenceSplitter.html
|
203
|
+
|
204
|
+
Example:
|
205
|
+
>>> from clarifai.rag import RAG
|
206
|
+
>>> rag_agent = RAG.setup(user_id=YOUR_USER_ID)
|
207
|
+
>>> rag_agent.upload(folder_path = "~/work/docs")
|
208
|
+
>>> rag_agent.upload(file_path = "~/work/docs/manual.pdf")
|
209
|
+
>>> rag_agent.chat(messages=[{"role":"human", "content":"What is Clarifai"}])
|
210
|
+
"""
|
211
|
+
# set batch size
|
212
|
+
if batch_size > MAX_UPLOAD_BATCH_SIZE:
|
213
|
+
raise ValueError(f"batch_size cannot be greater than {MAX_UPLOAD_BATCH_SIZE}")
|
214
|
+
|
215
|
+
# check if only one of file_path, folder_path, or url is specified
|
216
|
+
if file_path and (folder_path or url):
|
217
|
+
raise ValueError("Only one of file_path, folder_path, or url can be specified.")
|
218
|
+
if folder_path and (file_path or url):
|
219
|
+
raise ValueError("Only one of file_path, folder_path, or url can be specified.")
|
220
|
+
if url and (file_path or folder_path):
|
221
|
+
raise ValueError("Only one of file_path, folder_path, or url can be specified.")
|
222
|
+
|
223
|
+
# loading documents
|
224
|
+
documents = load_documents(file_path=file_path, folder_path=folder_path, url=url)
|
225
|
+
|
226
|
+
# splitting documents into chunks
|
227
|
+
text_chunks = []
|
228
|
+
metadata_list = []
|
229
|
+
|
230
|
+
# iterate through documents
|
231
|
+
for doc in documents:
|
232
|
+
doc_i = 0
|
233
|
+
cur_text_chunks = split_document(
|
234
|
+
text=doc.text, chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs
|
235
|
+
)
|
236
|
+
text_chunks.extend(cur_text_chunks)
|
237
|
+
metadata_list.extend([doc.metadata for _ in range(len(cur_text_chunks))])
|
238
|
+
# if batch size is reached, upload the batch
|
239
|
+
if len(text_chunks) > batch_size:
|
240
|
+
for idx in range(0, len(text_chunks), batch_size):
|
241
|
+
if idx + batch_size > len(text_chunks):
|
242
|
+
continue
|
243
|
+
batch_texts = text_chunks[0:batch_size]
|
244
|
+
batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
|
245
|
+
# metadata
|
246
|
+
batch_metadatas = metadata_list[0:batch_size]
|
247
|
+
meta_list = []
|
248
|
+
for meta in batch_metadatas:
|
249
|
+
meta_struct = Struct()
|
250
|
+
meta_struct.update(meta)
|
251
|
+
meta_struct.update({"doc_chunk_no": doc_i})
|
252
|
+
if metadata and isinstance(metadata, dict):
|
253
|
+
meta_struct.update(metadata)
|
254
|
+
meta_list.append(meta_struct)
|
255
|
+
doc_i += 1
|
256
|
+
del batch_metadatas
|
257
|
+
# creating input proto
|
258
|
+
input_batch = [
|
259
|
+
self._app.inputs().get_text_input(
|
260
|
+
input_id=batch_ids[i],
|
261
|
+
raw_text=text,
|
262
|
+
dataset_id=dataset_id,
|
263
|
+
metadata=meta_list[i],
|
264
|
+
)
|
265
|
+
for i, text in enumerate(batch_texts)
|
266
|
+
]
|
267
|
+
# uploading input with metadata
|
268
|
+
self._app.inputs().upload_inputs(inputs=input_batch)
|
269
|
+
# delete uploaded chunks
|
270
|
+
del text_chunks[0:batch_size]
|
271
|
+
del metadata_list[0:batch_size]
|
272
|
+
|
273
|
+
# uploading the remaining chunks
|
274
|
+
if len(text_chunks) > 0:
|
275
|
+
batch_size = len(text_chunks)
|
276
|
+
batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
|
277
|
+
# metadata
|
278
|
+
batch_metadatas = metadata_list[0:batch_size]
|
279
|
+
meta_list = []
|
280
|
+
for meta in batch_metadatas:
|
281
|
+
meta_struct = Struct()
|
282
|
+
meta_struct.update(meta)
|
283
|
+
meta_struct.update({"doc_chunk_no": doc_i})
|
284
|
+
if metadata and isinstance(metadata, dict):
|
285
|
+
meta_struct.update(metadata)
|
286
|
+
meta_list.append(meta_struct)
|
287
|
+
doc_i += 1
|
288
|
+
del batch_metadatas
|
289
|
+
# creating input proto
|
290
|
+
input_batch = [
|
291
|
+
self._app.inputs().get_text_input(
|
292
|
+
input_id=batch_ids[i],
|
293
|
+
raw_text=text,
|
294
|
+
dataset_id=dataset_id,
|
295
|
+
metadata=meta_list[i],
|
296
|
+
)
|
297
|
+
for i, text in enumerate(text_chunks)
|
298
|
+
]
|
299
|
+
# uploading input with metadata
|
300
|
+
self._app.inputs().upload_inputs(inputs=input_batch)
|
301
|
+
del text_chunks
|
302
|
+
del metadata_list
|
303
|
+
|
304
|
+
def chat(self, messages: List[dict], client_manage_state: bool = False) -> List[dict]:
|
305
|
+
"""Chat interface in OpenAI API format.
|
306
|
+
|
307
|
+
Args:
|
308
|
+
messages List[dict]: A list of dictionary in the following format:
|
309
|
+
```
|
310
|
+
[
|
311
|
+
{"role": "user", "content": "Hello there."},
|
312
|
+
{"role": "assistant", "content": "Hi, I'm Claude. How can I help you?"},
|
313
|
+
{"role": "user", "content": "Can you explain LLMs in plain English?"},
|
314
|
+
]
|
315
|
+
```
|
316
|
+
client_manage_state (bool): Whether the client will handle chat state management. Default is false.
|
317
|
+
|
318
|
+
This will pass back the workflow state ID for the server to store chat state.
|
319
|
+
"""
|
320
|
+
if client_manage_state:
|
321
|
+
single_prompt = convert_messages_to_str(messages)
|
322
|
+
input_proto = Inputs._get_proto("", "", text_pb=resources_pb2.Text(raw=single_prompt))
|
323
|
+
response = self._prompt_workflow.predict([input_proto])
|
324
|
+
messages.append(
|
325
|
+
format_assistant_message(response.results[0].outputs[-1].data.text.raw)
|
326
|
+
)
|
327
|
+
return messages
|
328
|
+
|
329
|
+
# server-side state management
|
330
|
+
message = messages[-1].get("content", "")
|
331
|
+
if len(message) == 0:
|
332
|
+
raise UserError("Empty message supplied.")
|
333
|
+
|
334
|
+
# get chat state id
|
335
|
+
chat_state_id = "init" if self.chat_state_id is None else self.chat_state_id
|
336
|
+
|
337
|
+
# call predict
|
338
|
+
input_proto = Inputs._get_proto("", "", text_pb=resources_pb2.Text(raw=message))
|
339
|
+
response = self._prompt_workflow.predict([input_proto], workflow_state_id=chat_state_id)
|
340
|
+
|
341
|
+
# store chat state id
|
342
|
+
self.chat_state_id = response.workflow_state.id
|
343
|
+
return [format_assistant_message(response.results[0].outputs[-1].data.text.raw)]
|