granite-common 0.1.dev19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- granite_common/.gitignore +3 -0
- granite_common/__init__.py +41 -0
- granite_common/_version.py +21 -0
- granite_common/base/__init__.py +6 -0
- granite_common/base/io.py +66 -0
- granite_common/base/types.py +217 -0
- granite_common/granite3/__init__.py +10 -0
- granite_common/granite3/constants.py +25 -0
- granite_common/granite3/granite32/__init__.py +16 -0
- granite_common/granite3/granite32/constants.py +90 -0
- granite_common/granite3/granite32/input.py +300 -0
- granite_common/granite3/granite32/output.py +681 -0
- granite_common/granite3/granite32/types.py +41 -0
- granite_common/granite3/granite33/__init__.py +16 -0
- granite_common/granite3/granite33/constants.py +94 -0
- granite_common/granite3/granite33/input.py +209 -0
- granite_common/granite3/granite33/output.py +633 -0
- granite_common/granite3/granite33/types.py +39 -0
- granite_common/granite3/input.py +116 -0
- granite_common/granite3/io.py +0 -0
- granite_common/granite3/output.py +251 -0
- granite_common/granite3/types.py +160 -0
- granite_common/util.py +68 -0
- granite_common-0.1.dev19.dist-info/METADATA +94 -0
- granite_common-0.1.dev19.dist-info/RECORD +28 -0
- granite_common-0.1.dev19.dist-info/WHEEL +5 -0
- granite_common-0.1.dev19.dist-info/licenses/LICENSE +201 -0
- granite_common-0.1.dev19.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
|
|
3
|
+
__doc__ = f"""
|
|
4
|
+
{__package__} is a Python library that provides enhanced prompt creation and output
|
|
5
|
+
parsing for IBM Granite models
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# Local
|
|
9
|
+
# This file explicitly imports all the symbols that we export at the top level of this
|
|
10
|
+
# package's namespace.
|
|
11
|
+
from .base.types import (
|
|
12
|
+
AssistantMessage,
|
|
13
|
+
ChatCompletion,
|
|
14
|
+
UserMessage,
|
|
15
|
+
)
|
|
16
|
+
from .granite3.granite32 import (
|
|
17
|
+
Granite3Point2ChatCompletion,
|
|
18
|
+
Granite3Point2InputProcessor,
|
|
19
|
+
Granite3Point2OutputProcessor,
|
|
20
|
+
)
|
|
21
|
+
from .granite3.granite33 import (
|
|
22
|
+
Granite3Point3ChatCompletion,
|
|
23
|
+
Granite3Point3InputProcessor,
|
|
24
|
+
Granite3Point3OutputProcessor,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# The contents of __all__ must be strings
|
|
28
|
+
__all__ = (
|
|
29
|
+
obj.__name__
|
|
30
|
+
for obj in (
|
|
31
|
+
AssistantMessage,
|
|
32
|
+
ChatCompletion,
|
|
33
|
+
UserMessage,
|
|
34
|
+
Granite3Point2InputProcessor,
|
|
35
|
+
Granite3Point2OutputProcessor,
|
|
36
|
+
Granite3Point2ChatCompletion,
|
|
37
|
+
Granite3Point3ChatCompletion,
|
|
38
|
+
Granite3Point3InputProcessor,
|
|
39
|
+
Granite3Point3OutputProcessor,
|
|
40
|
+
)
|
|
41
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = ["__version__", "__version_tuple__", "version", "version_tuple"]
|
|
5
|
+
|
|
6
|
+
TYPE_CHECKING = False
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from typing import Tuple
|
|
9
|
+
from typing import Union
|
|
10
|
+
|
|
11
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
12
|
+
else:
|
|
13
|
+
VERSION_TUPLE = object
|
|
14
|
+
|
|
15
|
+
version: str
|
|
16
|
+
__version__: str
|
|
17
|
+
__version_tuple__: VERSION_TUPLE
|
|
18
|
+
version_tuple: VERSION_TUPLE
|
|
19
|
+
|
|
20
|
+
__version__ = version = '0.1.dev19'
|
|
21
|
+
__version_tuple__ = version_tuple = (0, 1, 'dev19')
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
|
|
3
|
+
__doc__ = """
|
|
4
|
+
Classes and functions that implement common aspects of input and output string
|
|
5
|
+
processing for all Granite models.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# Standard
|
|
9
|
+
import abc
|
|
10
|
+
|
|
11
|
+
# Local
|
|
12
|
+
from .types import AssistantMessage, ChatCompletion
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class InputProcessor(abc.ABC):
|
|
16
|
+
"""
|
|
17
|
+
Interface for generic input processors. An input processor exposes an
|
|
18
|
+
API to transform a chat completion request into a string prompt.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
@abc.abstractmethod
|
|
22
|
+
def transform(
|
|
23
|
+
self, chat_completion: ChatCompletion, add_generation_prompt: bool = True
|
|
24
|
+
) -> str:
|
|
25
|
+
"""
|
|
26
|
+
Convert the structured representation of the inputs to a completion request into
|
|
27
|
+
the string representation of the tokens that should be sent to the model to
|
|
28
|
+
implement said request.
|
|
29
|
+
|
|
30
|
+
:param chat_completion: Structured representation of the inputs
|
|
31
|
+
:param add_generation_prompt: If ``True``, the returned prompt string will
|
|
32
|
+
contain a prefix of the next assistant response for use as a prompt to a
|
|
33
|
+
generation request. Otherwise, the prompt will only contain the messages and
|
|
34
|
+
documents in ``input``.
|
|
35
|
+
|
|
36
|
+
:returns: String that can be passed to the model's tokenizer to create a prompt
|
|
37
|
+
for generation.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class OutputProcessor(abc.ABC):
|
|
42
|
+
"""
|
|
43
|
+
Base class for generic output processors. An output processor exposes an
|
|
44
|
+
API to transform model output into a structured representation of the
|
|
45
|
+
information.
|
|
46
|
+
|
|
47
|
+
This interface is very generic; see individual classes for more specific arguments
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
@abc.abstractmethod
|
|
51
|
+
def transform(
|
|
52
|
+
self, model_output: str, chat_completion: ChatCompletion | None = None
|
|
53
|
+
) -> AssistantMessage:
|
|
54
|
+
"""
|
|
55
|
+
Convert the model output generated into a structured representation of the
|
|
56
|
+
information.
|
|
57
|
+
|
|
58
|
+
:param model_output: String output of the a generation request, potentially
|
|
59
|
+
incomplete if it was a streaming request
|
|
60
|
+
:param chat_completion: The chat completion request that produced
|
|
61
|
+
``model_output``. Parameters of the request can determine how the output
|
|
62
|
+
should be decoded.
|
|
63
|
+
|
|
64
|
+
:returns: The parsed output so far, as an instance of :class:`AssistantMessage`
|
|
65
|
+
possibly with model-specific extension fields.
|
|
66
|
+
"""
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Common shared types
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Standard
|
|
8
|
+
from typing import Literal, TypeAlias
|
|
9
|
+
|
|
10
|
+
# Third Party
|
|
11
|
+
from typing_extensions import Any
|
|
12
|
+
import pydantic
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class NoDefaultsMixin:
|
|
16
|
+
"""
|
|
17
|
+
Mixin so that we don't need to copy and paste the code to avoid filling JSON values
|
|
18
|
+
with a full catalog of the default values of rarely-used fields.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
@pydantic.model_serializer(mode="wrap")
|
|
22
|
+
def _workaround_for_design_flaw_in_pydantic(self, nxt):
|
|
23
|
+
"""
|
|
24
|
+
Workaround for a design flaw in Pydantic that forces users to accept
|
|
25
|
+
unnecessary garbage in their serialized JSON data or to override
|
|
26
|
+
poorly-documented serialization hooks repeatedly. Automates overriding said
|
|
27
|
+
poorly-documented serialization hooks for a single dataclass.
|
|
28
|
+
|
|
29
|
+
See https://github.com/pydantic/pydantic/issues/4554 for the relevant dismissive
|
|
30
|
+
comment from the devs. This comment suggests overriding :func:`dict()`, but that
|
|
31
|
+
method was disabled a year later. Now you need to add a custom serializer method
|
|
32
|
+
with a ``@model_serializer`` decorator.
|
|
33
|
+
|
|
34
|
+
See the docs at
|
|
35
|
+
https://docs.pydantic.dev/latest/api/functional_serializers/
|
|
36
|
+
for some dubious information on how this API works.
|
|
37
|
+
See comments below for important gotchas that aren't in the documentation.
|
|
38
|
+
"""
|
|
39
|
+
# Start with the value that self.model_dump() would return without this mixin.
|
|
40
|
+
# Otherwise serialization of sub-records will be inconsistent.
|
|
41
|
+
serialized_value = nxt(self)
|
|
42
|
+
|
|
43
|
+
# Figure out which fields are set. Pydantic does not make this easy.
|
|
44
|
+
# Start with fields that are set in __init__() or in the JSON parser.
|
|
45
|
+
fields_to_retain_set = self.model_fields_set
|
|
46
|
+
|
|
47
|
+
# Add in fields that were set during validation and extra fields added by
|
|
48
|
+
# setattr(). These fields all go to self.model.extra
|
|
49
|
+
if self.model_extra is not None: # model_extra is sometimes None. Not sure why.
|
|
50
|
+
# model_extra is a dictionary. There is no self.model_extra_fields_set.
|
|
51
|
+
fields_to_retain_set |= set(list(self.model_extra))
|
|
52
|
+
|
|
53
|
+
# Use a subclass hook for the additional fields that fall through the cracks.
|
|
54
|
+
fields_to_retain_set |= set(self._keep_these_fields())
|
|
55
|
+
|
|
56
|
+
# Avoid changing Pydantic's field order or downstream code that computes a
|
|
57
|
+
# diff over JSON strings will break.
|
|
58
|
+
fields_to_retain = [k for k in serialized_value if k in fields_to_retain_set]
|
|
59
|
+
|
|
60
|
+
# Fields that weren't in the original serialized value should be in a consistent
|
|
61
|
+
# order to ensure consistent serialized output.
|
|
62
|
+
# Use alphabetical order for now and hope for the best.
|
|
63
|
+
fields_to_retain.extend(sorted(fields_to_retain_set - self.model_fields_set))
|
|
64
|
+
|
|
65
|
+
result = {}
|
|
66
|
+
for f in fields_to_retain:
|
|
67
|
+
if f in serialized_value:
|
|
68
|
+
result[f] = serialized_value[f]
|
|
69
|
+
else:
|
|
70
|
+
# Sometimes Pydantic adds fields to self.model_fields_set without adding
|
|
71
|
+
# them to the output of self.model_dump()
|
|
72
|
+
result[f] = getattr(self, f)
|
|
73
|
+
return result
|
|
74
|
+
|
|
75
|
+
def _keep_these_fields(self) -> tuple[str]:
|
|
76
|
+
"""
|
|
77
|
+
Dataclasses that include this mixin can override this method to add specific
|
|
78
|
+
default values to serialized JSON.
|
|
79
|
+
|
|
80
|
+
This is necessary for round-tripping to JSON when there are fields that
|
|
81
|
+
determine which dataclass to use for deserialization.
|
|
82
|
+
"""
|
|
83
|
+
return ()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class _ChatMessageBase(pydantic.BaseModel, NoDefaultsMixin):
|
|
87
|
+
"""Base class for all message types.
|
|
88
|
+
|
|
89
|
+
Due to the vagaries of Pydantic's JSON parser, we use this class only for common
|
|
90
|
+
functionality, and NOT for defining a common dataclass base type. Use the
|
|
91
|
+
:class:`ChatMessage` type alias to annotate a field or argument as accepting all
|
|
92
|
+
subclasses of this one."""
|
|
93
|
+
|
|
94
|
+
content: str
|
|
95
|
+
"""Every message has raw string content, even if it also contains parsed structured
|
|
96
|
+
content such as a JSON record."""
|
|
97
|
+
|
|
98
|
+
def _keep_these_fields(self):
|
|
99
|
+
return ("role",)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class UserMessage(_ChatMessageBase):
|
|
103
|
+
"""User message for an IBM Granite model chat completion request."""
|
|
104
|
+
|
|
105
|
+
role: Literal["user"] = "user"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class ToolCall(pydantic.BaseModel, NoDefaultsMixin):
|
|
109
|
+
"""Format of an entry in the ``tool_calls`` list of an assistant message"""
|
|
110
|
+
|
|
111
|
+
id: str | None = None
|
|
112
|
+
name: str
|
|
113
|
+
|
|
114
|
+
# This field should adhere to the argument schema from the associated
|
|
115
|
+
# FunctionDefinition in the generation request that produced it.
|
|
116
|
+
arguments: dict[str, Any] | None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class AssistantMessage(_ChatMessageBase):
|
|
120
|
+
"""
|
|
121
|
+
Lowest-common-denominator assistant message for an IBM Granite model chat
|
|
122
|
+
completion request.
|
|
123
|
+
"""
|
|
124
|
+
|
|
125
|
+
role: Literal["assistant"] = "assistant"
|
|
126
|
+
tool_calls: list[ToolCall] | None = None
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class ToolResultMessage(_ChatMessageBase):
|
|
130
|
+
"""
|
|
131
|
+
Message containing the result of a tool call in an IBM Granite model chat completion
|
|
132
|
+
request.
|
|
133
|
+
"""
|
|
134
|
+
|
|
135
|
+
role: Literal["tool"] = "tool"
|
|
136
|
+
tool_call_id: str
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class SystemMessage(_ChatMessageBase):
|
|
140
|
+
"""System message for an IBM Granite model chat completion request."""
|
|
141
|
+
|
|
142
|
+
role: Literal["system"] = "system"
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
ChatMessage: TypeAlias = (
|
|
146
|
+
UserMessage | AssistantMessage | ToolResultMessage | SystemMessage
|
|
147
|
+
)
|
|
148
|
+
"""Type alias for all message types. We use this Union instead of the actual base class
|
|
149
|
+
:class:`_ChatMessageBase` so that Pydantic can parse the message list from JSON."""
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class ToolDefinition(pydantic.BaseModel, NoDefaultsMixin):
|
|
153
|
+
"""
|
|
154
|
+
An entry in the ``tools`` list in an IBM Granite model chat completion request.
|
|
155
|
+
"""
|
|
156
|
+
|
|
157
|
+
name: str
|
|
158
|
+
description: str | None = None
|
|
159
|
+
|
|
160
|
+
# This field holds a JSON schema for a record, but the `jsonschema` package doesn't
|
|
161
|
+
# define an object type for such a schema, instead using a dictionary.
|
|
162
|
+
parameters: dict[str, Any] | None = None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class Document(pydantic.BaseModel, NoDefaultsMixin):
|
|
166
|
+
"""RAG documents, which in practice are usually snippets drawn from larger
|
|
167
|
+
documents."""
|
|
168
|
+
|
|
169
|
+
text: str
|
|
170
|
+
doc_id: str | None = None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class ChatTemplateKwargs(pydantic.BaseModel, NoDefaultsMixin):
|
|
174
|
+
"""
|
|
175
|
+
Values that can appear in the ``chat_template_kwargs`` portion of a valid chat
|
|
176
|
+
completion request for a Granite model.
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
documents: list[Document] | None = None
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class ChatCompletion(pydantic.BaseModel, NoDefaultsMixin):
|
|
183
|
+
"""
|
|
184
|
+
Lowest-common-denominator inputs to a chat completion request for an IBM Granite
|
|
185
|
+
model.
|
|
186
|
+
|
|
187
|
+
The schema of this object mirrors that of a chat completion request in vLLM's
|
|
188
|
+
OpenAI-compatible inference API.
|
|
189
|
+
"""
|
|
190
|
+
|
|
191
|
+
messages: list[ChatMessage]
|
|
192
|
+
model: str | None = None
|
|
193
|
+
tools: list[ToolDefinition] | None = None
|
|
194
|
+
|
|
195
|
+
chat_template_kwargs: ChatTemplateKwargs | None = pydantic.Field(
|
|
196
|
+
default=None,
|
|
197
|
+
description=(
|
|
198
|
+
"Additional kwargs to pass to the template renderer. "
|
|
199
|
+
"Will be accessible by the chat template. "
|
|
200
|
+
"Restricted to fields that at least one Granite model "
|
|
201
|
+
"supports."
|
|
202
|
+
),
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
model_config = pydantic.ConfigDict(
|
|
206
|
+
# Pass through arbitrary additional keyword arguments for handling by
|
|
207
|
+
# model-specific I/O processors.
|
|
208
|
+
arbitrary_types_allowed=True,
|
|
209
|
+
extra="allow",
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
def __getattr__(self, name: str) -> any:
|
|
213
|
+
"""Allow attribute access for unknown attributes"""
|
|
214
|
+
try:
|
|
215
|
+
return super().__getattr__(name)
|
|
216
|
+
except AttributeError:
|
|
217
|
+
return None
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
|
|
3
|
+
# Standard
|
|
4
|
+
|
|
5
|
+
__doc__ = """
|
|
6
|
+
Constants used in code that is specific to the Granite 3 family of models, but not
|
|
7
|
+
specific to a particular point release.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# String that a Granite 3.x model must receive immediately after _SYSTEM_MESSAGE_START
|
|
12
|
+
# if there are documents in the current request but there are no tools in the current
|
|
13
|
+
# request.
|
|
14
|
+
NO_TOOLS_AND_DOCS_SYSTEM_MESSAGE_PART = """\
|
|
15
|
+
Write the response to the user's input by strictly aligning with the facts in the \
|
|
16
|
+
provided documents. If the information needed to answer the question is not available \
|
|
17
|
+
in the documents, inform the user that the question cannot be answered based on the \
|
|
18
|
+
available data."""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# String that a Granite 3.x model must receive immediately after _SYSTEM_MESSAGE_START
|
|
22
|
+
# if there are no tools or documents in the current request and the "thinking" flag is
|
|
23
|
+
# set to `False`.
|
|
24
|
+
NO_TOOLS_NO_DOCS_NO_THINKING_SYSTEM_MESSAGE_PART = """\
|
|
25
|
+
You are a helpful AI assistant."""
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
|
|
3
|
+
__doc__ = """
|
|
4
|
+
Input and output processing for the Granite 3.2 family of models.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
# Local
|
|
8
|
+
from .input import Granite3Point2InputProcessor
|
|
9
|
+
from .output import Granite3Point2OutputProcessor
|
|
10
|
+
from .types import Granite3Point2ChatCompletion
|
|
11
|
+
|
|
12
|
+
__all__ = (
|
|
13
|
+
"Granite3Point2ChatCompletion",
|
|
14
|
+
"Granite3Point2InputProcessor",
|
|
15
|
+
"Granite3Point2OutputProcessor",
|
|
16
|
+
)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
|
|
3
|
+
__doc__ = """
|
|
4
|
+
Constants used in code that is specific to the Granite 3.2 family of models.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
# Delimiters for chain of thought output of Granite 3.2
|
|
9
|
+
COT_START = "Here is my thought process:"
|
|
10
|
+
COT_END = "Here is my response:"
|
|
11
|
+
|
|
12
|
+
# Some versions of the model are known to shorten "Here is" to "Here's", so we
|
|
13
|
+
# provide alternate forms of these strings for those versions.
|
|
14
|
+
COT_START_ALTERNATIVES = [
|
|
15
|
+
COT_START,
|
|
16
|
+
"Here's my thought process:",
|
|
17
|
+
]
|
|
18
|
+
COT_END_ALTERNATIVES = [
|
|
19
|
+
COT_END,
|
|
20
|
+
"Here's my response:",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
# Delimiters for hallucination and citation output of Granite 3.2
|
|
24
|
+
CITATION_START = "# Citations:"
|
|
25
|
+
HALLUCINATION_START = "# Hallucinations:"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# String that a Granite 3.2 model must receive immediately after _SYSTEM_MESSAGE_START
|
|
29
|
+
# if there are both tools and RAG documents in the current request.
|
|
30
|
+
TOOLS_AND_DOCS_SYSTEM_MESSAGE_PART = """\
|
|
31
|
+
You are a helpful AI assistant with access to the following tools. When a tool is \
|
|
32
|
+
required to answer the user's query, respond with <|tool_call|> followed by a JSON \
|
|
33
|
+
list of tools used. If a tool does not exist in the provided list of tools, notify the \
|
|
34
|
+
user that you do not have the ability to fulfill the request.
|
|
35
|
+
|
|
36
|
+
Write the response to the user's input by strictly aligning with the facts in the \
|
|
37
|
+
provided documents. If the information needed to answer the question is not available \
|
|
38
|
+
in the documents, inform the user that the question cannot be answered based on the \
|
|
39
|
+
available data."""
|
|
40
|
+
|
|
41
|
+
# String that a Granite 3.2 model must receive immediately after _SYSTEM_MESSAGE_START
|
|
42
|
+
# if there are no tools or documents in the current request and the "thinking" flag is
|
|
43
|
+
# set to `True`.
|
|
44
|
+
NO_TOOLS_AND_NO_DOCS_AND_THINKING_SYSTEM_MESSAGE_PART = f"""\
|
|
45
|
+
You are a helpful AI assistant.
|
|
46
|
+
Respond to every user query in a comprehensive and detailed way. You can write down \
|
|
47
|
+
your thoughts and reasoning process before responding. In the thought process, engage \
|
|
48
|
+
in a comprehensive cycle of analysis, summarization, exploration, reassessment, \
|
|
49
|
+
reflection, backtracing, and iteration to develop well-considered thinking process. \
|
|
50
|
+
In the response section, based on various attempts, explorations, and reflections from \
|
|
51
|
+
the thoughts section, systematically present the final solution that you deem correct. \
|
|
52
|
+
The response should summarize the thought process. Write your thoughts after '\
|
|
53
|
+
{COT_START}' and write your response after '{COT_END}' \
|
|
54
|
+
for each user query."""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# String that a Granite 3.2 model must receive immediately after either
|
|
58
|
+
# _TOOLS_AND_DOCS_SYSTEM_MESSAGE_MIDDLE (if there are tools) or
|
|
59
|
+
# _NO_TOOLS_AND_DOCS_SYSTEM_MESSAGE_MIDDLE (if there are no tools) in the system prompt
|
|
60
|
+
# if the "citations" flag is `True` and there are documents.
|
|
61
|
+
DOCS_AND_CITATIONS_SYSTEM_MESSAGE_PART = """\
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
In your response, use the symbols <co> and </co> to indicate when a fact comes from a \
|
|
65
|
+
document in the search result, e.g <co>0</co> for a fact from document 0. Afterwards, \
|
|
66
|
+
list all the citations with their corresponding documents in an ordered list."""
|
|
67
|
+
|
|
68
|
+
# String that a Granite 3.2 model must receive immediately after either
|
|
69
|
+
# _TOOLS_AND_DOCS_SYSTEM_MESSAGE_MIDDLE (if there are tools and no citations) or
|
|
70
|
+
# _NO_TOOLS_AND_DOCS_SYSTEM_MESSAGE_MIDDLE (if there are no tools or citations) or
|
|
71
|
+
# _DOCS_AND_CITATIONS_SYSTEM_MESSAGE_PART in the system prompt
|
|
72
|
+
# if the "hallucinations" flag is `True` and there are documents.
|
|
73
|
+
# Note that a list of zero documents counts as "having documents".
|
|
74
|
+
DOCS_AND_HALLUCINATIONS_SYSTEM_MESSAGE_PART = """\
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
Finally, after the response is written, include a numbered list of sentences from the \
|
|
78
|
+
response that are potentially hallucinated and not based in the documents."""
|
|
79
|
+
|
|
80
|
+
# String that a Granite 3.2 model must receive immediately after _SYSTEM_MESSAGE_START
|
|
81
|
+
# if there are tools in the current request but there are no documents in the current
|
|
82
|
+
# request.
|
|
83
|
+
TOOLS_AND_NO_DOCS_SYSTEM_MESSAGE_PART = """\
|
|
84
|
+
You are a helpful AI assistant with access to the following tools. When a tool is \
|
|
85
|
+
required to answer the user's query, respond with <|tool_call|> followed by a JSON \
|
|
86
|
+
list of tools used. If a tool does not exist in the provided list of tools, notify the \
|
|
87
|
+
user that you do not have the ability to fulfill the request."""
|
|
88
|
+
|
|
89
|
+
MODEL_NAME = "Granite 3.2"
|
|
90
|
+
MODEL_HF_PATH_2B = "ibm-granite/granite-3.2-2b-instruct"
|