clarifai 9.11.0__py3-none-any.whl → 10.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clarifai/client/app.py +1 -1
- clarifai/client/input.py +34 -1
- clarifai/client/search.py +15 -9
- clarifai/client/workflow.py +6 -2
- clarifai/constants/rag.py +1 -0
- clarifai/models/model_serving/README.md +1 -1
- clarifai/models/model_serving/models/default_test.py +3 -0
- clarifai/rag/__init__.py +3 -0
- clarifai/rag/rag.py +261 -0
- clarifai/rag/utils.py +102 -0
- clarifai/schema/search.py +3 -0
- clarifai/urls/helper.py +17 -0
- clarifai/versions.py +1 -1
- {clarifai-9.11.0.dist-info → clarifai-10.0.0.dist-info}/METADATA +19 -6
- clarifai-10.0.0.dist-info/RECORD +103 -0
- clarifai/models/model_serving/examples/README.md +0 -7
- clarifai/models/model_serving/examples/image_classification/README.md +0 -12
- clarifai/models/model_serving/examples/image_classification/age_vit/1/__init__.py +0 -0
- clarifai/models/model_serving/examples/image_classification/age_vit/1/inference.py +0 -64
- clarifai/models/model_serving/examples/image_classification/age_vit/1/model.py +0 -74
- clarifai/models/model_serving/examples/image_classification/age_vit/1/vit-age-classifier/README.md +0 -11
- clarifai/models/model_serving/examples/image_classification/age_vit/1/vit-age-classifier/config.json +0 -42
- clarifai/models/model_serving/examples/image_classification/age_vit/1/vit-age-classifier/preprocessor_config.json +0 -15
- clarifai/models/model_serving/examples/image_classification/age_vit/config.pbtxt +0 -23
- clarifai/models/model_serving/examples/image_classification/age_vit/labels.txt +0 -9
- clarifai/models/model_serving/examples/image_classification/age_vit/requirements.txt +0 -7
- clarifai/models/model_serving/examples/multimodal_embedder/README.md +0 -12
- clarifai/models/model_serving/examples/multimodal_embedder/clip/1/__init__.py +0 -0
- clarifai/models/model_serving/examples/multimodal_embedder/clip/1/inference.py +0 -66
- clarifai/models/model_serving/examples/multimodal_embedder/clip/1/model.py +0 -74
- clarifai/models/model_serving/examples/multimodal_embedder/clip/1/test.py +0 -64
- clarifai/models/model_serving/examples/multimodal_embedder/clip/config.pbtxt +0 -29
- clarifai/models/model_serving/examples/multimodal_embedder/clip/requirements.txt +0 -4
- clarifai/models/model_serving/examples/text_classification/README.md +0 -12
- clarifai/models/model_serving/examples/text_classification/xlm-roberta/1/__init__.py +0 -0
- clarifai/models/model_serving/examples/text_classification/xlm-roberta/1/inference.py +0 -62
- clarifai/models/model_serving/examples/text_classification/xlm-roberta/1/model.py +0 -74
- clarifai/models/model_serving/examples/text_classification/xlm-roberta/1/twitter-xlm-roberta-base-sentiment/README.md +0 -12
- clarifai/models/model_serving/examples/text_classification/xlm-roberta/1/twitter-xlm-roberta-base-sentiment/config.json +0 -34
- clarifai/models/model_serving/examples/text_classification/xlm-roberta/1/twitter-xlm-roberta-base-sentiment/special_tokens_map.json +0 -1
- clarifai/models/model_serving/examples/text_classification/xlm-roberta/config.pbtxt +0 -21
- clarifai/models/model_serving/examples/text_classification/xlm-roberta/labels.txt +0 -3
- clarifai/models/model_serving/examples/text_classification/xlm-roberta/requirements.txt +0 -7
- clarifai/models/model_serving/examples/text_embedding/README.md +0 -12
- clarifai/models/model_serving/examples/text_embedding/instructor-xl/1/__init__.py +0 -0
- clarifai/models/model_serving/examples/text_embedding/instructor-xl/1/inference.py +0 -63
- clarifai/models/model_serving/examples/text_embedding/instructor-xl/1/model.py +0 -74
- clarifai/models/model_serving/examples/text_embedding/instructor-xl/1/test.py +0 -64
- clarifai/models/model_serving/examples/text_embedding/instructor-xl/config.pbtxt +0 -20
- clarifai/models/model_serving/examples/text_embedding/instructor-xl/requirements.txt +0 -9
- clarifai/models/model_serving/examples/text_to_image/README.md +0 -10
- clarifai/models/model_serving/examples/text_to_image/sd-v1.5/1/__init__.py +0 -0
- clarifai/models/model_serving/examples/text_to_image/sd-v1.5/1/inference.py +0 -58
- clarifai/models/model_serving/examples/text_to_image/sd-v1.5/1/model.py +0 -74
- clarifai/models/model_serving/examples/text_to_image/sd-v1.5/config.pbtxt +0 -22
- clarifai/models/model_serving/examples/text_to_image/sd-v1.5/requirements.txt +0 -6
- clarifai/models/model_serving/examples/text_to_text/README.md +0 -12
- clarifai/models/model_serving/examples/text_to_text/bart-summarize/1/__init__.py +0 -0
- clarifai/models/model_serving/examples/text_to_text/bart-summarize/1/inference.py +0 -59
- clarifai/models/model_serving/examples/text_to_text/bart-summarize/1/model.py +0 -74
- clarifai/models/model_serving/examples/text_to_text/bart-summarize/config.pbtxt +0 -20
- clarifai/models/model_serving/examples/text_to_text/bart-summarize/requirements.txt +0 -4
- clarifai/models/model_serving/examples/visual_detection/README.md +0 -15
- clarifai/models/model_serving/examples/visual_detection/yolov5x/1/inference.py +0 -81
- clarifai/models/model_serving/examples/visual_detection/yolov5x/1/model.py +0 -74
- clarifai/models/model_serving/examples/visual_detection/yolov5x/config.pbtxt +0 -36
- clarifai/models/model_serving/examples/visual_detection/yolov5x/labels.txt +0 -80
- clarifai/models/model_serving/examples/visual_detection/yolov5x/requirements.txt +0 -12
- clarifai/models/model_serving/examples/visual_embedding/README.md +0 -12
- clarifai/models/model_serving/examples/visual_embedding/vit-base/1/__init__.py +0 -0
- clarifai/models/model_serving/examples/visual_embedding/vit-base/1/inference.py +0 -56
- clarifai/models/model_serving/examples/visual_embedding/vit-base/1/model.py +0 -74
- clarifai/models/model_serving/examples/visual_embedding/vit-base/config.pbtxt +0 -22
- clarifai/models/model_serving/examples/visual_embedding/vit-base/requirements.txt +0 -5
- clarifai/models/model_serving/examples/visual_segmentation/README.md +0 -12
- clarifai/models/model_serving/examples/visual_segmentation/segformer-b2/1/__init__.py +0 -0
- clarifai/models/model_serving/examples/visual_segmentation/segformer-b2/1/inference.py +0 -62
- clarifai/models/model_serving/examples/visual_segmentation/segformer-b2/1/model.py +0 -74
- clarifai/models/model_serving/examples/visual_segmentation/segformer-b2/config.pbtxt +0 -24
- clarifai/models/model_serving/examples/visual_segmentation/segformer-b2/labels.txt +0 -18
- clarifai/models/model_serving/examples/visual_segmentation/segformer-b2/requirements.txt +0 -5
- clarifai/models/model_serving/examples/vllm/Readme.md +0 -12
- clarifai/models/model_serving/examples/vllm/example/1/__init__.py +0 -0
- clarifai/models/model_serving/examples/vllm/example/1/inference.py +0 -56
- clarifai/models/model_serving/examples/vllm/example/1/model.py +0 -74
- clarifai/models/model_serving/examples/vllm/example/1/test.py +0 -64
- clarifai/models/model_serving/examples/vllm/example/1/weights/keep +0 -0
- clarifai/models/model_serving/examples/vllm/example/config.pbtxt +0 -20
- clarifai/models/model_serving/examples/vllm/example/requirements.txt +0 -5
- clarifai-9.11.0.dist-info/RECORD +0 -173
- {clarifai-9.11.0.dist-info → clarifai-10.0.0.dist-info}/LICENSE +0 -0
- {clarifai-9.11.0.dist-info → clarifai-10.0.0.dist-info}/WHEEL +0 -0
- {clarifai-9.11.0.dist-info → clarifai-10.0.0.dist-info}/entry_points.txt +0 -0
- {clarifai-9.11.0.dist-info → clarifai-10.0.0.dist-info}/top_level.txt +0 -0
clarifai/client/app.py
CHANGED
@@ -47,7 +47,7 @@ class App(Lister, BaseClient):
|
|
47
47
|
if url and app_id:
|
48
48
|
raise UserError("You can only specify one of url or app_id.")
|
49
49
|
if url:
|
50
|
-
user_id, app_id
|
50
|
+
user_id, app_id = ClarifaiUrlHelper.split_clarifai_app_url(url)
|
51
51
|
kwargs = {'user_id': user_id}
|
52
52
|
self.kwargs = {**kwargs, 'id': app_id}
|
53
53
|
self.app_info = resources_pb2.App(**self.kwargs)
|
clarifai/client/input.py
CHANGED
@@ -7,11 +7,13 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
7
|
from multiprocessing import cpu_count
|
8
8
|
from typing import Generator, List, Union
|
9
9
|
|
10
|
+
import requests
|
10
11
|
from clarifai_grpc.grpc.api import resources_pb2, service_pb2 # noqa: F401
|
11
12
|
from clarifai_grpc.grpc.api.resources_pb2 import Annotation, Audio, Image, Input, Text, Video
|
12
13
|
from clarifai_grpc.grpc.api.status import status_code_pb2, status_pb2
|
13
14
|
from google.protobuf.json_format import MessageToDict
|
14
15
|
from google.protobuf.struct_pb2 import Struct
|
16
|
+
from requests.adapters import HTTPAdapter, Retry
|
15
17
|
from tqdm import tqdm
|
16
18
|
|
17
19
|
from clarifai.client.base import BaseClient
|
@@ -306,7 +308,7 @@ class Inputs(Lister, BaseClient):
|
|
306
308
|
>>> from clarifai.client.input import Inputs
|
307
309
|
>>> input_protos = Inputs.get_multimodal_input(input_id = 'demo', raw_text = 'What time of day is it?', image_url='https://samples.clarifai.com/metro-north.jpg')
|
308
310
|
"""
|
309
|
-
if (image_bytes and image_url) or (not
|
311
|
+
if (image_bytes and image_url) or (not image_bytes and not image_url):
|
310
312
|
return UserError("Please supply only one of image_bytes or image_url, and not both.")
|
311
313
|
if (text_bytes and raw_text) or (not text_bytes and not raw_text):
|
312
314
|
return UserError("Please supply only one of text_bytes or raw_text, and not both.")
|
@@ -729,6 +731,37 @@ class Inputs(Lister, BaseClient):
|
|
729
731
|
raise Exception(response.status)
|
730
732
|
self.logger.info("\nInputs Deleted\n%s", response.status)
|
731
733
|
|
734
|
+
def download_inputs(self, inputs: List[Input]) -> List[bytes]:
|
735
|
+
"""Download list of input objects from the app.
|
736
|
+
|
737
|
+
Args:
|
738
|
+
input_ids (Input): List of input objects to download.
|
739
|
+
|
740
|
+
Example:
|
741
|
+
>>> from clarifai.client.user import User
|
742
|
+
>>> input_obj = User(user_id="user_id").app(app_id="app_id").inputs()
|
743
|
+
>>> input_obj.download_inputs(list(input_obj.list_inputs()))
|
744
|
+
"""
|
745
|
+
if not isinstance(inputs, list):
|
746
|
+
raise UserError("input_ids must be a list of input ids")
|
747
|
+
final_inputs = []
|
748
|
+
#initiate session
|
749
|
+
session = requests.Session()
|
750
|
+
retries = Retry(total=3, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504])
|
751
|
+
session.mount('https://', HTTPAdapter(max_retries=retries))
|
752
|
+
session.headers.update({'Authorization': self.metadata[0][1]})
|
753
|
+
# download inputs
|
754
|
+
data_types = ['image', 'video', 'audio', 'text']
|
755
|
+
for input in inputs:
|
756
|
+
for data_type in data_types:
|
757
|
+
url = getattr(input.data, data_type).url
|
758
|
+
if url:
|
759
|
+
response = session.get(url, stream=True)
|
760
|
+
if response.status_code == 200:
|
761
|
+
final_inputs.append(response.content)
|
762
|
+
|
763
|
+
return final_inputs
|
764
|
+
|
732
765
|
def list_inputs(self,
|
733
766
|
dataset_id: str = None,
|
734
767
|
page_no: int = None,
|
clarifai/client/search.py
CHANGED
@@ -18,8 +18,8 @@ from clarifai.schema.search import get_schema
|
|
18
18
|
class Search(Lister, BaseClient):
|
19
19
|
|
20
20
|
def __init__(self,
|
21
|
-
user_id,
|
22
|
-
app_id,
|
21
|
+
user_id: str,
|
22
|
+
app_id: str,
|
23
23
|
top_k: int = DEFAULT_TOP_K,
|
24
24
|
metric: str = DEFAULT_SEARCH_METRIC,
|
25
25
|
base_url: str = "https://api.clarifai.com",
|
@@ -208,7 +208,9 @@ class Search(Lister, BaseClient):
|
|
208
208
|
>>> search = Search(user_id='user_id', app_id='app_id', top_k=1, metric='cosine')
|
209
209
|
>>> res = search.query(ranks=[{'image_url': 'https://samples.clarifai.com/dog.tiff'}])
|
210
210
|
|
211
|
-
Note:
|
211
|
+
Note:
|
212
|
+
For schema of rank and filter, please refer to [schema](https://github.com/Clarifai/clarifai-python/tree/master/clarifai/schema/search.py).
|
213
|
+
For more detailed search examples, please refer to [examples](https://github.com/Clarifai/examples/tree/main/search).
|
212
214
|
"""
|
213
215
|
try:
|
214
216
|
self.rank_filter_schema.validate(ranks)
|
@@ -216,7 +218,13 @@ class Search(Lister, BaseClient):
|
|
216
218
|
except SchemaError as err:
|
217
219
|
raise UserError(f"Invalid rank or filter input: {err}")
|
218
220
|
|
219
|
-
|
221
|
+
# For each rank, create a Rank proto message
|
222
|
+
rank_annot_proto = []
|
223
|
+
for rank_dict in ranks:
|
224
|
+
rank_annot_proto.append(self._get_annot_proto(**rank_dict))
|
225
|
+
all_ranks = [resources_pb2.Rank(annotation=rank_annot) for rank_annot in rank_annot_proto]
|
226
|
+
|
227
|
+
# Calls PostInputsSearches for annotation ranks, input filters
|
220
228
|
if any(["input" in k for k in filters[0].keys()]):
|
221
229
|
filters_input_proto = []
|
222
230
|
for filter_dict in filters:
|
@@ -228,20 +236,18 @@ class Search(Lister, BaseClient):
|
|
228
236
|
user_app_id=self.user_app_id,
|
229
237
|
searches=[
|
230
238
|
resources_pb2.Search(
|
231
|
-
query=resources_pb2.Query(filters=all_filters),
|
239
|
+
query=resources_pb2.Query(ranks=all_ranks, filters=all_filters),
|
240
|
+
metric=self.metric_distance)
|
232
241
|
])
|
233
242
|
|
234
243
|
return self.list_all_pages_generator(self.STUB.PostInputsSearches,
|
235
244
|
service_pb2.PostInputsSearchesRequest, request_data)
|
236
245
|
|
237
246
|
# Calls PostAnnotationsSearches for annotation ranks, filters
|
238
|
-
|
239
|
-
for rank_dict in ranks:
|
240
|
-
rank_annot_proto.append(self._get_annot_proto(**rank_dict))
|
247
|
+
filters_annot_proto = []
|
241
248
|
for filter_dict in filters:
|
242
249
|
filters_annot_proto.append(self._get_annot_proto(**filter_dict))
|
243
250
|
|
244
|
-
all_ranks = [resources_pb2.Rank(annotation=rank_annot) for rank_annot in rank_annot_proto]
|
245
251
|
all_filters = [
|
246
252
|
resources_pb2.Filter(annotation=filter_annot) for filter_annot in filters_annot_proto
|
247
253
|
]
|
clarifai/client/workflow.py
CHANGED
@@ -58,11 +58,12 @@ class Workflow(Lister, BaseClient):
|
|
58
58
|
BaseClient.__init__(self, user_id=self.user_id, app_id=self.app_id, base=base_url, pat=pat)
|
59
59
|
Lister.__init__(self)
|
60
60
|
|
61
|
-
def predict(self, inputs: List[Input]):
|
61
|
+
def predict(self, inputs: List[Input], workflow_state_id: str = None):
|
62
62
|
"""Predicts the workflow based on the given inputs.
|
63
63
|
|
64
64
|
Args:
|
65
65
|
inputs (list[Input]): The inputs to predict.
|
66
|
+
workflow_state_id (str): key for the workflow state cache that stores the workflow node predictions.
|
66
67
|
"""
|
67
68
|
if len(inputs) > MAX_WORKFLOW_PREDICT_INPUTS:
|
68
69
|
raise UserError(f"Too many inputs. Max is {MAX_WORKFLOW_PREDICT_INPUTS}."
|
@@ -74,6 +75,9 @@ class Workflow(Lister, BaseClient):
|
|
74
75
|
inputs=inputs,
|
75
76
|
output_config=self.output_config)
|
76
77
|
|
78
|
+
if workflow_state_id:
|
79
|
+
request.workflow_state.id = workflow_state_id
|
80
|
+
|
77
81
|
start_time = time.time()
|
78
82
|
backoff_iterator = BackoffIterator()
|
79
83
|
|
@@ -81,7 +85,7 @@ class Workflow(Lister, BaseClient):
|
|
81
85
|
response = self._grpc_request(self.STUB.PostWorkflowResults, request)
|
82
86
|
|
83
87
|
if response.status.code == status_code_pb2.MODEL_DEPLOYING and \
|
84
|
-
time.time() - start_time < 60:
|
88
|
+
time.time() - start_time < 60*10: # 10 minutes
|
85
89
|
self.logger.info(f"{self.id} Workflow is still deploying, please wait...")
|
86
90
|
time.sleep(next(backoff_iterator))
|
87
91
|
continue
|
@@ -0,0 +1 @@
|
|
1
|
+
MAX_UPLOAD_BATCH_SIZE = 128
|
@@ -151,7 +151,7 @@ Additional methods can be added to this script's `Infer` class by the user as de
|
|
151
151
|
- [Model types docs](docs/model_types.md)
|
152
152
|
- [Model Output types docs](docs/output.md)
|
153
153
|
- [Dependencies](docs/dependencies.md)
|
154
|
-
- [Examples](examples
|
154
|
+
- [Examples](https://github.com/Clarifai/examples)
|
155
155
|
- [Custom Configs](docs/custom_config.md/)
|
156
156
|
|
157
157
|
## Prerequisites
|
@@ -67,6 +67,9 @@ class DefaultTestInferenceModel(unittest.TestCase):
|
|
67
67
|
is_instance_kind_gpu: bool = True,
|
68
68
|
inference_parameters: Union[str, Dict[str, Any]] = ""):
|
69
69
|
import sys
|
70
|
+
#
|
71
|
+
if 'inference' in sys.modules:
|
72
|
+
del sys.modules['inference']
|
70
73
|
sys.path.append(repo_version_dir)
|
71
74
|
self.model_type = model_type
|
72
75
|
self.is_instance_kind_gpu = is_instance_kind_gpu
|
clarifai/rag/__init__.py
ADDED
clarifai/rag/rag.py
ADDED
@@ -0,0 +1,261 @@
|
|
1
|
+
import uuid
|
2
|
+
from datetime import datetime
|
3
|
+
from typing import List
|
4
|
+
|
5
|
+
import yaml
|
6
|
+
from clarifai_grpc.grpc.api import resources_pb2 # noqa: F401
|
7
|
+
from google.protobuf.struct_pb2 import Struct
|
8
|
+
|
9
|
+
from clarifai.client.app import App
|
10
|
+
from clarifai.client.input import Inputs
|
11
|
+
from clarifai.client.model import Model
|
12
|
+
from clarifai.client.user import User
|
13
|
+
from clarifai.client.workflow import Workflow
|
14
|
+
from clarifai.constants.rag import MAX_UPLOAD_BATCH_SIZE
|
15
|
+
from clarifai.errors import UserError
|
16
|
+
from clarifai.rag.utils import (convert_messages_to_str, format_assistant_message, load_documents,
|
17
|
+
split_document)
|
18
|
+
from clarifai.utils.logging import get_logger
|
19
|
+
|
20
|
+
|
21
|
+
class RAG:
|
22
|
+
"""
|
23
|
+
RAG is a class for Retrieval Augmented Generation.
|
24
|
+
|
25
|
+
Example:
|
26
|
+
>>> from clarifai.rag import RAG
|
27
|
+
>>> rag_agent = RAG()
|
28
|
+
"""
|
29
|
+
chat_state_id = None
|
30
|
+
|
31
|
+
def __init__(self,
|
32
|
+
workflow_url: str = None,
|
33
|
+
workflow: Workflow = None,
|
34
|
+
base_url: str = "https://api.clarifai.com",
|
35
|
+
pat: str = None,
|
36
|
+
**kwargs):
|
37
|
+
"""Initialize an empty or existing RAG.
|
38
|
+
"""
|
39
|
+
self.logger = get_logger(logger_level="INFO", name=__name__)
|
40
|
+
if workflow_url is not None and workflow is None:
|
41
|
+
self.logger.info("workflow_url:%s", workflow_url)
|
42
|
+
w = Workflow(workflow_url, base_url=base_url, pat=pat)
|
43
|
+
self._prompt_workflow = w
|
44
|
+
self._app = App(app_id=w.app_id, base_url=w.base, pat=w.pat)
|
45
|
+
elif workflow_url is None and workflow is not None:
|
46
|
+
self._prompt_workflow = workflow
|
47
|
+
self._app = App(app_id=workflow.app_id, base_url=workflow.base, pat=workflow.pat)
|
48
|
+
|
49
|
+
@classmethod
|
50
|
+
def setup(cls,
|
51
|
+
user_id: str = None,
|
52
|
+
llm_url: str = "https://clarifai.com/mistralai/completion/models/mistral-7B-Instruct",
|
53
|
+
base_workflow: str = "Text",
|
54
|
+
workflow_yaml_filename: str = 'prompter_wf.yaml',
|
55
|
+
base_url: str = "https://api.clarifai.com",
|
56
|
+
pat: str = None,
|
57
|
+
**kwargs):
|
58
|
+
"""Creates an app with `Text` as base workflow, create prompt model, create prompt workflow.
|
59
|
+
|
60
|
+
Example:
|
61
|
+
>>> from clarifai.rag import RAG
|
62
|
+
>>> rag_agent = RAG.setup()
|
63
|
+
"""
|
64
|
+
user = User(user_id=user_id, base_url=base_url, pat=pat)
|
65
|
+
llm = Model(llm_url)
|
66
|
+
|
67
|
+
params = Struct()
|
68
|
+
params.update({
|
69
|
+
"prompt_template":
|
70
|
+
"Context information is below:\n{data.hits}\nGiven the context information and not prior knowledge, answer the query.\nQuery: {data.text.raw}\nAnswer: "
|
71
|
+
})
|
72
|
+
prompter_model_params = {"params": params}
|
73
|
+
|
74
|
+
## Create an App
|
75
|
+
now_ts = str(int(datetime.now().timestamp()))
|
76
|
+
app_id = f"rag_app_{now_ts}"
|
77
|
+
app = user.create_app(app_id=app_id, base_workflow=base_workflow)
|
78
|
+
|
79
|
+
## Create rag-prompter model and version
|
80
|
+
prompter_model = app.create_model(
|
81
|
+
model_id=f"rag_prompter_{now_ts}", model_type_id="rag-prompter")
|
82
|
+
prompter_model = prompter_model.create_version(output_info=prompter_model_params)
|
83
|
+
|
84
|
+
## Generate a tmp yaml file for workflow creation
|
85
|
+
workflow_id = f"rag-wf-{now_ts}"
|
86
|
+
workflow_dict = {
|
87
|
+
"workflow": {
|
88
|
+
"id":
|
89
|
+
workflow_id,
|
90
|
+
"nodes": [{
|
91
|
+
"id": "rag-prompter",
|
92
|
+
"model": {
|
93
|
+
"model_id": prompter_model.id,
|
94
|
+
"model_version_id": prompter_model.model_version.id
|
95
|
+
}
|
96
|
+
}, {
|
97
|
+
"id": "llm",
|
98
|
+
"model": {
|
99
|
+
"model_id": llm.id,
|
100
|
+
"user_id": llm.user_id,
|
101
|
+
"app_id": llm.app_id
|
102
|
+
},
|
103
|
+
"node_inputs": [{
|
104
|
+
"node_id": "rag-prompter"
|
105
|
+
}]
|
106
|
+
}]
|
107
|
+
}
|
108
|
+
}
|
109
|
+
with open(workflow_yaml_filename, 'w') as out_file:
|
110
|
+
yaml.dump(workflow_dict, out_file, default_flow_style=False)
|
111
|
+
|
112
|
+
## Create prompt workflow
|
113
|
+
wf = app.create_workflow(config_filepath=workflow_yaml_filename)
|
114
|
+
del user, llm, prompter_model, prompter_model_params
|
115
|
+
return cls(workflow=wf)
|
116
|
+
|
117
|
+
def upload(self,
|
118
|
+
file_path: str = None,
|
119
|
+
folder_path: str = None,
|
120
|
+
url: str = None,
|
121
|
+
batch_size: int = 128,
|
122
|
+
chunk_size: int = 1024,
|
123
|
+
chunk_overlap: int = 200,
|
124
|
+
**kwargs) -> None:
|
125
|
+
"""Uploads documents to the app.
|
126
|
+
- Read from a local directory or public url or local filename.
|
127
|
+
- Parse the document(s) into chunks.
|
128
|
+
- Ingest chunks into the app with metadata.
|
129
|
+
|
130
|
+
Args:
|
131
|
+
file_path str: File path to the document.
|
132
|
+
folder_path str: Folder path to the documents.
|
133
|
+
url str: Public url to the document.
|
134
|
+
batch_size int: Batch size for uploading.
|
135
|
+
chunk_size int: Chunk size for splitting the document.
|
136
|
+
chunk_overlap int: The token overlap of each chunk when splitting.
|
137
|
+
**kwargs: Additional arguments for the SentenceSplitter. Refer https://docs.llamaindex.ai/en/stable/api/llama_index.node_parser.SentenceSplitter.html
|
138
|
+
|
139
|
+
Example:
|
140
|
+
>>> from clarifai.rag import RAG
|
141
|
+
>>> rag_agent = RAG.setup()
|
142
|
+
>>> rag_agent.upload(folder_path = "~/work/docs")
|
143
|
+
>>> rag_agent.upload(file_path = "~/work/docs/manual.pdf")
|
144
|
+
"""
|
145
|
+
#set batch size
|
146
|
+
if batch_size > MAX_UPLOAD_BATCH_SIZE:
|
147
|
+
raise ValueError(f"batch_size cannot be greater than {MAX_UPLOAD_BATCH_SIZE}")
|
148
|
+
|
149
|
+
#check if only one of file_path, folder_path, or url is specified
|
150
|
+
if file_path and (folder_path or url):
|
151
|
+
raise ValueError("Only one of file_path, folder_path, or url can be specified.")
|
152
|
+
if folder_path and (file_path or url):
|
153
|
+
raise ValueError("Only one of file_path, folder_path, or url can be specified.")
|
154
|
+
if url and (file_path or folder_path):
|
155
|
+
raise ValueError("Only one of file_path, folder_path, or url can be specified.")
|
156
|
+
|
157
|
+
#loading documents
|
158
|
+
documents = load_documents(file_path=file_path, folder_path=folder_path, url=url)
|
159
|
+
|
160
|
+
#splitting documents into chunks
|
161
|
+
text_chunks = []
|
162
|
+
metadata = []
|
163
|
+
|
164
|
+
#iterate through documents
|
165
|
+
for doc in documents:
|
166
|
+
cur_text_chunks = split_document(
|
167
|
+
text=doc.text, chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
|
168
|
+
text_chunks.extend(cur_text_chunks)
|
169
|
+
metadata.extend([doc.metadata for _ in range(len(cur_text_chunks))])
|
170
|
+
#if batch size is reached, upload the batch
|
171
|
+
if len(text_chunks) > batch_size:
|
172
|
+
for idx in range(0, len(text_chunks), batch_size):
|
173
|
+
if idx + batch_size > len(text_chunks):
|
174
|
+
continue
|
175
|
+
batch_texts = text_chunks[0:batch_size]
|
176
|
+
batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
|
177
|
+
#metadata
|
178
|
+
batch_metadatas = metadata[0:batch_size]
|
179
|
+
meta_list = []
|
180
|
+
for meta in batch_metadatas:
|
181
|
+
meta_struct = Struct()
|
182
|
+
meta_struct.update(meta)
|
183
|
+
meta_list.append(meta_struct)
|
184
|
+
del batch_metadatas
|
185
|
+
#creating input proto
|
186
|
+
input_batch = [
|
187
|
+
self._app.inputs().get_text_input(
|
188
|
+
input_id=batch_ids[i],
|
189
|
+
raw_text=text,
|
190
|
+
metadata=meta_list[i],
|
191
|
+
) for i, text in enumerate(batch_texts)
|
192
|
+
]
|
193
|
+
#uploading input with metadata
|
194
|
+
self._app.inputs().upload_inputs(inputs=input_batch)
|
195
|
+
#delete uploaded chunks
|
196
|
+
del text_chunks[0:batch_size]
|
197
|
+
del metadata[0:batch_size]
|
198
|
+
|
199
|
+
#uploading the remaining chunks
|
200
|
+
if len(text_chunks) > 0:
|
201
|
+
batch_size = len(text_chunks)
|
202
|
+
batch_ids = [uuid.uuid4().hex for _ in range(batch_size)]
|
203
|
+
#metadata
|
204
|
+
batch_metadatas = metadata[0:batch_size]
|
205
|
+
meta_list = []
|
206
|
+
for meta in batch_metadatas:
|
207
|
+
meta_struct = Struct()
|
208
|
+
meta_struct.update(meta)
|
209
|
+
meta_list.append(meta_struct)
|
210
|
+
del batch_metadatas
|
211
|
+
#creating input proto
|
212
|
+
input_batch = [
|
213
|
+
self._app.inputs().get_text_input(
|
214
|
+
input_id=batch_ids[i],
|
215
|
+
raw_text=text,
|
216
|
+
metadata=meta_list[i],
|
217
|
+
) for i, text in enumerate(text_chunks)
|
218
|
+
]
|
219
|
+
#uploading input with metadata
|
220
|
+
self._app.inputs().upload_inputs(inputs=input_batch)
|
221
|
+
del text_chunks
|
222
|
+
del metadata
|
223
|
+
|
224
|
+
def chat(self, messages: List[dict], client_manage_state: bool = False) -> List[dict]:
|
225
|
+
"""Chat interface in OpenAI API format.
|
226
|
+
|
227
|
+
Args:
|
228
|
+
messages List[dict]: A list of dictionary in the following format:
|
229
|
+
```
|
230
|
+
[
|
231
|
+
{"role": "user", "content": "Hello there."},
|
232
|
+
{"role": "assistant", "content": "Hi, I'm Claude. How can I help you?"},
|
233
|
+
{"role": "user", "content": "Can you explain LLMs in plain English?"},
|
234
|
+
]
|
235
|
+
```
|
236
|
+
client_manage_state (bool): Whether the client will handle chat state management. Default is false.
|
237
|
+
|
238
|
+
This will pass back the workflow state ID for the server to store chat state.
|
239
|
+
"""
|
240
|
+
if client_manage_state:
|
241
|
+
single_prompt = convert_messages_to_str(messages)
|
242
|
+
input_proto = Inputs._get_proto("", "", text_pb=resources_pb2.Text(raw=single_prompt))
|
243
|
+
response = self._prompt_workflow.predict([input_proto])
|
244
|
+
messages.append(format_assistant_message(response.results[0].outputs[-1].data.text.raw))
|
245
|
+
return messages
|
246
|
+
|
247
|
+
# server-side state management
|
248
|
+
message = messages[-1].get("content", "")
|
249
|
+
if len(message) == 0:
|
250
|
+
raise UserError("Empty message supplied.")
|
251
|
+
|
252
|
+
# get chat state id
|
253
|
+
chat_state_id = "init" if self.chat_state_id is None else self.chat_state_id
|
254
|
+
|
255
|
+
# call predict
|
256
|
+
input_proto = Inputs._get_proto("", "", text_pb=resources_pb2.Text(raw=message))
|
257
|
+
response = self._prompt_workflow.predict([input_proto], workflow_state_id=chat_state_id)
|
258
|
+
|
259
|
+
# store chat state id
|
260
|
+
self.chat_state_id = response.workflow_state.id
|
261
|
+
return [format_assistant_message(response.results[0].outputs[-1].data.text.raw)]
|
clarifai/rag/utils.py
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
import io
|
2
|
+
from pathlib import Path
|
3
|
+
from typing import List
|
4
|
+
|
5
|
+
import requests
|
6
|
+
from llama_index import Document, SimpleDirectoryReader, download_loader
|
7
|
+
from llama_index.node_parser.text import SentenceSplitter
|
8
|
+
from pypdf import PdfReader
|
9
|
+
|
10
|
+
|
11
|
+
## TODO: Make this token-aware.
|
12
|
+
def convert_messages_to_str(messages: List[dict]) -> str:
|
13
|
+
"""convert messages in OpenAI API format into a single string.
|
14
|
+
|
15
|
+
Args:
|
16
|
+
messages List[dict]: A list of dictionary in the following format:
|
17
|
+
```
|
18
|
+
[
|
19
|
+
{"role": "user", "content": "Hello there."},
|
20
|
+
{"role": "assistant", "content": "Hi, I'm Claude. How can I help you?"},
|
21
|
+
{"role": "user", "content": "Can you explain LLMs in plain English?"},
|
22
|
+
]
|
23
|
+
```
|
24
|
+
"""
|
25
|
+
final_str = ""
|
26
|
+
for msg in messages:
|
27
|
+
if "role" in msg and "content" in msg:
|
28
|
+
role = msg.get("role", "")
|
29
|
+
content = msg.get("content", "")
|
30
|
+
final_str += f"\n\n{role}: {content}"
|
31
|
+
return final_str
|
32
|
+
|
33
|
+
|
34
|
+
def format_assistant_message(raw_text: str) -> dict:
|
35
|
+
return {"role": "assistant", "content": raw_text}
|
36
|
+
|
37
|
+
|
38
|
+
def load_documents(file_path: str = None, folder_path: str = None,
|
39
|
+
url: str = None) -> List[Document]:
|
40
|
+
"""Loads documents from a local directory or public url or local filename.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
file_path (str): The path to the filename.
|
44
|
+
folder_path (str): The path to the folder.
|
45
|
+
url (str): The url to the file.
|
46
|
+
"""
|
47
|
+
#document loaders for filepath
|
48
|
+
if file_path:
|
49
|
+
if file_path.endswith(".pdf"):
|
50
|
+
PDFReader = download_loader("PDFReader")
|
51
|
+
loader = PDFReader()
|
52
|
+
documents = loader.load_data(file=Path(file_path))
|
53
|
+
elif file_path.endswith(".docx"):
|
54
|
+
docReader = download_loader("DocxReader")
|
55
|
+
loader = docReader()
|
56
|
+
documents = loader.load_data(file=Path(file_path))
|
57
|
+
elif file_path.endswith(".txt"):
|
58
|
+
with open(file_path, 'r') as file:
|
59
|
+
text_content = file.read()
|
60
|
+
documents = [Document(text=text_content)]
|
61
|
+
else:
|
62
|
+
raise ValueError("Only .pdf, .docx, and .txt files are supported.")
|
63
|
+
|
64
|
+
#document loaders for folderpath
|
65
|
+
if folder_path:
|
66
|
+
documents = SimpleDirectoryReader(
|
67
|
+
input_dir=Path(folder_path), required_exts=[".pdf", ".docx"]).load_data()
|
68
|
+
|
69
|
+
#document loaders for url
|
70
|
+
if url:
|
71
|
+
response = requests.get(url)
|
72
|
+
if response.status_code != 200:
|
73
|
+
raise ValueError(f"Invalid url {url}.")
|
74
|
+
#for text files
|
75
|
+
try:
|
76
|
+
documents = [Document(text=response.content)]
|
77
|
+
#for pdf files
|
78
|
+
except Exception:
|
79
|
+
documents = []
|
80
|
+
pdf_file = PdfReader(io.BytesIO(response.content))
|
81
|
+
num_pages = len(pdf_file.pages)
|
82
|
+
for page in range(num_pages):
|
83
|
+
page_text = pdf_file.pages[page].extract_text()
|
84
|
+
documents.append(Document(text=page_text))
|
85
|
+
else:
|
86
|
+
raise ValueError(f"Invalid url {url}.")
|
87
|
+
|
88
|
+
return documents
|
89
|
+
|
90
|
+
|
91
|
+
def split_document(text: str, chunk_size: int, chunk_overlap: int, **kwargs) -> List[str]:
|
92
|
+
"""Splits a document into chunks of text.
|
93
|
+
|
94
|
+
Args:
|
95
|
+
text (str): The text to split.
|
96
|
+
chunk_size (int): The size of each chunk.
|
97
|
+
chunk_overlap (int): The amount of overlap between each chunk.
|
98
|
+
**kwargs: Additional keyword arguments for the SentenceSplitter.
|
99
|
+
"""
|
100
|
+
text_parser = SentenceSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, **kwargs)
|
101
|
+
text_chunks = text_parser.split_text(text)
|
102
|
+
return text_chunks
|
clarifai/schema/search.py
CHANGED
@@ -20,6 +20,9 @@ def get_schema() -> Schema:
|
|
20
20
|
- 'id': Non-empty string
|
21
21
|
- 'language': Non-empty string
|
22
22
|
- 'value': 0 or 1 integer
|
23
|
+
- 'input_types': List of 'image', 'video', 'text' or 'audio'
|
24
|
+
- 'input_dataset_ids': List of strings
|
25
|
+
- 'input_status_code': Integer
|
23
26
|
|
24
27
|
Returns:
|
25
28
|
Schema: The schema for rank and filter.
|
clarifai/urls/helper.py
CHANGED
@@ -58,6 +58,23 @@ class ClarifaiUrlHelper(object):
|
|
58
58
|
return "%s/%s/%s/%s/%s/versions/%s" % (self.auth.ui, user_id, app_id, resource_type,
|
59
59
|
resource_id, version_id)
|
60
60
|
|
61
|
+
@classmethod
|
62
|
+
def split_clarifai_app_url(cls, url):
|
63
|
+
"""
|
64
|
+
clarifai.com uses fully qualified urls to resources.
|
65
|
+
They are in the format of:
|
66
|
+
https://clarifai.com/{user_id}/{app_id}/
|
67
|
+
"""
|
68
|
+
url = url.replace("https://", "", 1).replace("http://", "", 1)
|
69
|
+
o = urlparse(url)
|
70
|
+
path = o.path
|
71
|
+
path = path.lstrip("/")
|
72
|
+
parts = path.split("/")
|
73
|
+
if len(parts) != 3:
|
74
|
+
raise ValueError(
|
75
|
+
f"Provided url must have 2 parts after the domain name. The current parts are: {parts}")
|
76
|
+
return tuple(parts[1:])
|
77
|
+
|
61
78
|
@classmethod
|
62
79
|
def split_clarifai_url(cls, url):
|
63
80
|
"""
|
clarifai/versions.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: clarifai
|
3
|
-
Version:
|
3
|
+
Version: 10.0.0
|
4
4
|
Summary: Clarifai Python SDK
|
5
5
|
Home-page: https://github.com/Clarifai/clarifai-python
|
6
6
|
Author: Clarifai
|
@@ -20,16 +20,18 @@ Classifier: Operating System :: OS Independent
|
|
20
20
|
Requires-Python: >=3.8
|
21
21
|
Description-Content-Type: text/markdown
|
22
22
|
License-File: LICENSE
|
23
|
-
Requires-Dist: clarifai-grpc (
|
23
|
+
Requires-Dist: clarifai-grpc (~=10.0.1)
|
24
24
|
Requires-Dist: pandas (>=1.3.5)
|
25
25
|
Requires-Dist: numpy (>=1.22.0)
|
26
26
|
Requires-Dist: tqdm (>=4.65.0)
|
27
|
-
Requires-Dist: opencv-python (
|
28
|
-
Requires-Dist: tritonclient (
|
27
|
+
Requires-Dist: opencv-python (>=4.7.0.68)
|
28
|
+
Requires-Dist: tritonclient (>=2.34.0)
|
29
29
|
Requires-Dist: rich (>=13.4.2)
|
30
30
|
Requires-Dist: PyYAML (>=6.0.1)
|
31
|
-
Requires-Dist: schema (
|
31
|
+
Requires-Dist: schema (>=0.7.5)
|
32
32
|
Requires-Dist: Pillow (>=9.5.0)
|
33
|
+
Requires-Dist: llama-index (>=0.9.27)
|
34
|
+
Requires-Dist: pypdf (>=3.17.4)
|
33
35
|
Provides-Extra: all
|
34
36
|
Requires-Dist: pycocotools (==2.0.6) ; extra == 'all'
|
35
37
|
|
@@ -55,7 +57,7 @@ Clarifai Python SDK</a>
|
|
55
57
|
|
56
58
|
This is the official Python client for interacting with our powerful [API](https://docs.clarifai.com). The Clarifai Python SDK offers a comprehensive set of tools to integrate Clarifai's AI platform to leverage computer vision capabilities like classification , detection ,segementation and natural language capabilities like classification , summarisation , generation , Q&A ,etc into your applications. With just a few lines of code, you can leverage cutting-edge artificial intelligence to unlock valuable insights from visual and textual content.
|
57
59
|
|
58
|
-
[Website](https://www.clarifai.com/) | [Demo](https://clarifai.com/demo) | [Signup for a Free Account](https://clarifai.com/signup) | [API Docs](https://docs.clarifai.com/) | [Clarifai Community](https://clarifai.com/explore) | [Python SDK Docs](https://docs.clarifai.com/python-sdk/api-reference) | [Examples](https://github.com/Clarifai/examples) | [Colab Notebooks](https://github.com/Clarifai/colab-notebooks) | [Discord](https://discord.gg/XAPE3Vtg)
|
60
|
+
[Website](https://www.clarifai.com/) | [Schedule Demo](https://www.clarifai.com/company/schedule-demo) | [Signup for a Free Account](https://clarifai.com/signup) | [API Docs](https://docs.clarifai.com/) | [Clarifai Community](https://clarifai.com/explore) | [Python SDK Docs](https://docs.clarifai.com/python-sdk/api-reference) | [Examples](https://github.com/Clarifai/examples) | [Colab Notebooks](https://github.com/Clarifai/colab-notebooks) | [Discord](https://discord.gg/XAPE3Vtg)
|
59
61
|
|
60
62
|
|
61
63
|
---
|
@@ -212,6 +214,17 @@ annotations_list = list(annotation_generator)
|
|
212
214
|
all_concepts = list(app.list_concepts())
|
213
215
|
```
|
214
216
|
|
217
|
+
#### Input Download
|
218
|
+
```python
|
219
|
+
#listing inputs
|
220
|
+
input_generator = input_obj.list_inputs(page_no=1,per_page=1,input_type='image')
|
221
|
+
inputs_list = list(input_generator)
|
222
|
+
|
223
|
+
#downloading_inputs
|
224
|
+
input_bytes = input_obj.download_inputs(inputs_list)
|
225
|
+
with open('demo.jpg','wb') as f:
|
226
|
+
f.write(input_bytes[0])
|
227
|
+
```
|
215
228
|
|
216
229
|
|
217
230
|
## :brain: Interacting with Models
|