chatlas 0.4.0__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of chatlas might be problematic. Click here for more details.

chatlas/__init__.py CHANGED
@@ -1,7 +1,9 @@
1
1
  from . import types
2
2
  from ._anthropic import ChatAnthropic, ChatBedrockAnthropic
3
+ from ._auto import ChatAuto
3
4
  from ._chat import Chat
4
5
  from ._content_image import content_image_file, content_image_plot, content_image_url
6
+ from ._content_pdf import content_pdf_file, content_pdf_url
5
7
  from ._github import ChatGithub
6
8
  from ._google import ChatGoogle, ChatVertex
7
9
  from ._groq import ChatGroq
@@ -10,12 +12,19 @@ from ._ollama import ChatOllama
10
12
  from ._openai import ChatAzureOpenAI, ChatOpenAI
11
13
  from ._perplexity import ChatPerplexity
12
14
  from ._provider import Provider
15
+ from ._snowflake import ChatSnowflake
13
16
  from ._tokens import token_usage
14
17
  from ._tools import Tool
15
18
  from ._turn import Turn
16
19
 
20
+ try:
21
+ from ._version import version as __version__
22
+ except ImportError: # pragma: no cover
23
+ __version__ = "0.0.0" # stub value for docs
24
+
17
25
  __all__ = (
18
26
  "ChatAnthropic",
27
+ "ChatAuto",
19
28
  "ChatBedrockAnthropic",
20
29
  "ChatGithub",
21
30
  "ChatGoogle",
@@ -24,11 +33,14 @@ __all__ = (
24
33
  "ChatOpenAI",
25
34
  "ChatAzureOpenAI",
26
35
  "ChatPerplexity",
36
+ "ChatSnowflake",
27
37
  "ChatVertex",
28
38
  "Chat",
29
39
  "content_image_file",
30
40
  "content_image_plot",
31
41
  "content_image_url",
42
+ "content_pdf_file",
43
+ "content_pdf_url",
32
44
  "interpolate",
33
45
  "interpolate_file",
34
46
  "Provider",
chatlas/_anthropic.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import base64
3
4
  import json
4
5
  import warnings
5
6
  from typing import TYPE_CHECKING, Any, Literal, Optional, Union, cast, overload
@@ -12,6 +13,7 @@ from ._content import (
12
13
  ContentImageInline,
13
14
  ContentImageRemote,
14
15
  ContentJson,
16
+ ContentPDF,
15
17
  ContentText,
16
18
  ContentToolRequest,
17
19
  ContentToolResult,
@@ -31,6 +33,7 @@ if TYPE_CHECKING:
31
33
  ToolParam,
32
34
  ToolUseBlock,
33
35
  )
36
+ from anthropic.types.document_block_param import DocumentBlockParam
34
37
  from anthropic.types.image_block_param import ImageBlockParam
35
38
  from anthropic.types.model_param import ModelParam
36
39
  from anthropic.types.text_block_param import TextBlockParam
@@ -45,6 +48,7 @@ if TYPE_CHECKING:
45
48
  ImageBlockParam,
46
49
  ToolUseBlockParam,
47
50
  ToolResultBlockParam,
51
+ DocumentBlockParam,
48
52
  ]
49
53
  else:
50
54
  Message = object
@@ -72,7 +76,7 @@ def ChatAnthropic(
72
76
  ::: {.callout-note}
73
77
  ## API key
74
78
 
75
- Note that a Claude Prop membership does not give you the ability to call
79
+ Note that a Claude Pro membership does not give you the ability to call
76
80
  models via the API. You will need to go to the [developer
77
81
  console](https://console.anthropic.com/account/keys) to sign up (and pay
78
82
  for) a developer account that will give you an API key that you can use with
@@ -82,7 +86,7 @@ def ChatAnthropic(
82
86
  ::: {.callout-note}
83
87
  ## Python requirements
84
88
 
85
- `ChatAnthropic` requires the `anthropic` package (e.g., `pip install anthropic`).
89
+ `ChatAnthropic` requires the `anthropic` package: `pip install "chatlas[anthropic]"`.
86
90
  :::
87
91
 
88
92
  Examples
@@ -164,7 +168,7 @@ def ChatAnthropic(
164
168
  """
165
169
 
166
170
  if model is None:
167
- model = log_model_default("claude-3-5-sonnet-latest")
171
+ model = log_model_default("claude-3-7-sonnet-latest")
168
172
 
169
173
  return Chat(
170
174
  provider=AnthropicProvider(
@@ -450,12 +454,21 @@ class AnthropicProvider(Provider[Message, RawMessageStreamEvent, Message]):
450
454
  return {"text": content.text, "type": "text"}
451
455
  elif isinstance(content, ContentJson):
452
456
  return {"text": "<structured data/>", "type": "text"}
457
+ elif isinstance(content, ContentPDF):
458
+ return {
459
+ "type": "document",
460
+ "source": {
461
+ "type": "base64",
462
+ "media_type": "application/pdf",
463
+ "data": base64.b64encode(content.data).decode("utf-8"),
464
+ },
465
+ }
453
466
  elif isinstance(content, ContentImageInline):
454
467
  return {
455
468
  "type": "image",
456
469
  "source": {
457
470
  "type": "base64",
458
- "media_type": content.content_type,
471
+ "media_type": content.image_content_type,
459
472
  "data": content.data or "",
460
473
  },
461
474
  }
@@ -504,7 +517,7 @@ class AnthropicProvider(Provider[Message, RawMessageStreamEvent, Message]):
504
517
  contents = []
505
518
  for content in completion.content:
506
519
  if content.type == "text":
507
- contents.append(ContentText(content.text))
520
+ contents.append(ContentText(text=content.text))
508
521
  elif content.type == "tool_use":
509
522
  if has_data_model and content.name == "_structured_tool_call":
510
523
  if not isinstance(content.input, dict):
@@ -515,11 +528,11 @@ class AnthropicProvider(Provider[Message, RawMessageStreamEvent, Message]):
515
528
  raise ValueError(
516
529
  "Expected data extraction tool to return a 'data' field."
517
530
  )
518
- contents.append(ContentJson(content.input["data"]))
531
+ contents.append(ContentJson(value=content.input["data"]))
519
532
  else:
520
533
  contents.append(
521
534
  ContentToolRequest(
522
- content.id,
535
+ id=content.id,
523
536
  name=content.name,
524
537
  arguments=content.input,
525
538
  )
@@ -572,8 +585,8 @@ def ChatBedrockAnthropic(
572
585
  ::: {.callout-note}
573
586
  ## Python requirements
574
587
 
575
- `ChatBedrockAnthropic`, requires the `anthropic` package with the `bedrock` extras
576
- (e.g., `pip install anthropic[bedrock]`).
588
+ `ChatBedrockAnthropic`, requires the `anthropic` package with the `bedrock` extras:
589
+ `pip install "chatlas[bedrock-anthropic]"`
577
590
  :::
578
591
 
579
592
  Examples
chatlas/_auto.py ADDED
@@ -0,0 +1,183 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ from typing import Callable, Literal, Optional
6
+
7
+ from ._anthropic import ChatAnthropic, ChatBedrockAnthropic
8
+ from ._chat import Chat
9
+ from ._github import ChatGithub
10
+ from ._google import ChatGoogle, ChatVertex
11
+ from ._groq import ChatGroq
12
+ from ._ollama import ChatOllama
13
+ from ._openai import ChatAzureOpenAI, ChatOpenAI
14
+ from ._perplexity import ChatPerplexity
15
+ from ._snowflake import ChatSnowflake
16
+ from ._turn import Turn
17
+
18
+ AutoProviders = Literal[
19
+ "anthropic",
20
+ "bedrock-anthropic",
21
+ "github",
22
+ "google",
23
+ "groq",
24
+ "ollama",
25
+ "openai",
26
+ "azure-openai",
27
+ "perplexity",
28
+ "snowflake",
29
+ "vertex",
30
+ ]
31
+
32
+ _provider_chat_model_map: dict[AutoProviders, Callable[..., Chat]] = {
33
+ "anthropic": ChatAnthropic,
34
+ "bedrock-anthropic": ChatBedrockAnthropic,
35
+ "github": ChatGithub,
36
+ "google": ChatGoogle,
37
+ "groq": ChatGroq,
38
+ "ollama": ChatOllama,
39
+ "openai": ChatOpenAI,
40
+ "azure-openai": ChatAzureOpenAI,
41
+ "perplexity": ChatPerplexity,
42
+ "snowflake": ChatSnowflake,
43
+ "vertex": ChatVertex,
44
+ }
45
+
46
+
47
+ def ChatAuto(
48
+ system_prompt: Optional[str] = None,
49
+ turns: Optional[list[Turn]] = None,
50
+ *,
51
+ provider: Optional[AutoProviders] = None,
52
+ model: Optional[str] = None,
53
+ **kwargs,
54
+ ) -> Chat:
55
+ """
56
+ Use environment variables (env vars) to configure the Chat provider and model.
57
+
58
+ Creates a `:class:~chatlas.Chat` instance based on the specified provider.
59
+ The provider may be specified through the `provider` parameter and/or the
60
+ `CHATLAS_CHAT_PROVIDER` env var. If both are set, the env var takes
61
+ precedence. Similarly, the provider's model may be specified through the
62
+ `model` parameter and/or the `CHATLAS_CHAT_MODEL` env var. Also, additional
63
+ configuration may be provided through the `kwargs` parameter and/or the
64
+ `CHATLAS_CHAT_ARGS` env var (as a JSON string). In this case, when both are
65
+ set, they are merged, with the env var arguments taking precedence.
66
+
67
+ As a result, `ChatAuto()` provides a convenient way to set a default
68
+ provider and model in your Python code, while allowing you to override
69
+ these settings through env vars (i.e., without modifying your code).
70
+
71
+ Prerequisites
72
+ -------------
73
+
74
+ ::: {.callout-note}
75
+ ## API key
76
+
77
+ Follow the instructions for the specific provider to obtain an API key.
78
+ :::
79
+
80
+ ::: {.callout-note}
81
+ ## Python requirements
82
+
83
+ Follow the instructions for the specific provider to install the required
84
+ Python packages.
85
+ :::
86
+
87
+
88
+ Examples
89
+ --------
90
+ First, set the environment variables for the provider, arguments, and API key:
91
+
92
+ ```bash
93
+ export CHATLAS_CHAT_PROVIDER=anthropic
94
+ export CHATLAS_CHAT_MODEL=claude-3-haiku-20240229
95
+ export CHATLAS_CHAT_ARGS='{"kwargs": {"max_retries": 3}}'
96
+ export ANTHROPIC_API_KEY=your_api_key
97
+ ```
98
+
99
+ Then, you can use the `ChatAuto` function to create a Chat instance:
100
+
101
+ ```python
102
+ from chatlas import ChatAuto
103
+
104
+ chat = ChatAuto()
105
+ chat.chat("What is the capital of France?")
106
+ ```
107
+
108
+ Parameters
109
+ ----------
110
+ provider
111
+ The name of the default chat provider to use. Providers are strings
112
+ formatted in kebab-case, e.g. to use `ChatBedrockAnthropic` set
113
+ `provider="bedrock-anthropic"`.
114
+
115
+ This value can also be provided via the `CHATLAS_CHAT_PROVIDER`
116
+ environment variable, which takes precedence over `provider`
117
+ when set.
118
+ model
119
+ The name of the default model to use. This value can also be provided
120
+ via the `CHATLAS_CHAT_MODEL` environment variable, which takes
121
+ precedence over `model` when set.
122
+ system_prompt
123
+ A system prompt to set the behavior of the assistant.
124
+ turns
125
+ A list of turns to start the chat with (i.e., continuing a previous
126
+ conversation). If not provided, the conversation begins from scratch. Do
127
+ not provide non-`None` values for both `turns` and `system_prompt`. Each
128
+ message in the list should be a dictionary with at least `role` (usually
129
+ `system`, `user`, or `assistant`, but `tool` is also possible). Normally
130
+ there is also a `content` field, which is a string.
131
+ **kwargs
132
+ Additional keyword arguments to pass to the Chat constructor. See the
133
+ documentation for each provider for more details on the available
134
+ options.
135
+
136
+ These arguments can also be provided via the `CHATLAS_CHAT_ARGS`
137
+ environment variable as a JSON string. When provided, the options
138
+ in the `CHATLAS_CHAT_ARGS` envvar take precedence over the options
139
+ passed to `kwargs`.
140
+
141
+ Note that `system_prompt` and `turns` in `kwargs` or in
142
+ `CHATLAS_CHAT_ARGS` are ignored.
143
+
144
+ Returns
145
+ -------
146
+ Chat
147
+ A chat instance using the specified provider.
148
+
149
+ Raises
150
+ ------
151
+ ValueError
152
+ If no valid provider is specified either through parameters or
153
+ environment variables.
154
+ """
155
+ the_provider = os.environ.get("CHATLAS_CHAT_PROVIDER", provider)
156
+
157
+ if the_provider is None:
158
+ raise ValueError(
159
+ "Provider name is required as parameter or `CHATLAS_CHAT_PROVIDER` must be set."
160
+ )
161
+ if the_provider not in _provider_chat_model_map:
162
+ raise ValueError(
163
+ f"Provider name '{the_provider}' is not a known chatlas provider: "
164
+ f"{', '.join(_provider_chat_model_map.keys())}"
165
+ )
166
+
167
+ # `system_prompt` and `turns` always come from `ChatAuto()`
168
+ base_args = {"system_prompt": system_prompt, "turns": turns}
169
+
170
+ if env_model := os.environ.get("CHATLAS_CHAT_MODEL"):
171
+ model = env_model
172
+
173
+ if model:
174
+ base_args["model"] = model
175
+
176
+ env_kwargs = {}
177
+ if env_kwargs_str := os.environ.get("CHATLAS_CHAT_ARGS"):
178
+ env_kwargs = json.loads(env_kwargs_str)
179
+
180
+ kwargs = {**kwargs, **env_kwargs, **base_args}
181
+ kwargs = {k: v for k, v in kwargs.items() if v is not None}
182
+
183
+ return _provider_chat_model_map[the_provider](**kwargs)
chatlas/_chat.py CHANGED
@@ -408,7 +408,9 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
408
408
  Whether to run the app in a background thread. If `None`, the app will
409
409
  run in a background thread if the current environment is a notebook.
410
410
  echo
411
- Whether to echo text content, all content (i.e., tool calls), or no content. Defaults to `"none"` when `stream=True` and `"text"` when `stream=False`.
411
+ Whether to echo text content, all content (i.e., tool calls), or no
412
+ content. Defaults to `"none"` when `stream=True` and `"text"` when
413
+ `stream=False`.
412
414
  kwargs
413
415
  Additional keyword arguments to pass to the method used for requesting
414
416
  the response.
@@ -1246,7 +1248,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
1246
1248
  id_: str,
1247
1249
  ) -> ContentToolResult:
1248
1250
  if func is None:
1249
- return ContentToolResult(id_, value=None, error="Unknown tool")
1251
+ return ContentToolResult(id=id_, value=None, error="Unknown tool")
1250
1252
 
1251
1253
  name = func.__name__
1252
1254
 
@@ -1256,10 +1258,10 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
1256
1258
  else:
1257
1259
  result = func(arguments)
1258
1260
 
1259
- return ContentToolResult(id_, value=result, error=None, name=name)
1261
+ return ContentToolResult(id=id_, value=result, error=None, name=name)
1260
1262
  except Exception as e:
1261
1263
  log_tool_error(name, str(arguments), e)
1262
- return ContentToolResult(id_, value=None, error=str(e), name=name)
1264
+ return ContentToolResult(id=id_, value=None, error=str(e), name=name)
1263
1265
 
1264
1266
  @staticmethod
1265
1267
  async def _invoke_tool_async(
@@ -1268,7 +1270,7 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
1268
1270
  id_: str,
1269
1271
  ) -> ContentToolResult:
1270
1272
  if func is None:
1271
- return ContentToolResult(id_, value=None, error="Unknown tool")
1273
+ return ContentToolResult(id=id_, value=None, error="Unknown tool")
1272
1274
 
1273
1275
  name = func.__name__
1274
1276
 
@@ -1278,10 +1280,10 @@ class Chat(Generic[SubmitInputArgsT, CompletionT]):
1278
1280
  else:
1279
1281
  result = await func(arguments)
1280
1282
 
1281
- return ContentToolResult(id_, value=result, error=None, name=name)
1283
+ return ContentToolResult(id=id_, value=result, error=None, name=name)
1282
1284
  except Exception as e:
1283
1285
  log_tool_error(func.__name__, str(arguments), e)
1284
- return ContentToolResult(id_, value=None, error=str(e), name=name)
1286
+ return ContentToolResult(id=id_, value=None, error=str(e), name=name)
1285
1287
 
1286
1288
  def _markdown_display(
1287
1289
  self, echo: Literal["text", "all", "none"]
chatlas/_content.py CHANGED
@@ -1,9 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import json
4
- from dataclasses import dataclass
5
4
  from pprint import pformat
6
- from typing import Any, Literal, Optional
5
+ from typing import Any, Literal, Optional, Union
6
+
7
+ from pydantic import BaseModel, ConfigDict
7
8
 
8
9
  ImageContentTypes = Literal[
9
10
  "image/png",
@@ -15,12 +16,28 @@ ImageContentTypes = Literal[
15
16
  Allowable content types for images.
16
17
  """
17
18
 
19
+ ContentTypeEnum = Literal[
20
+ "text",
21
+ "image_remote",
22
+ "image_inline",
23
+ "tool_request",
24
+ "tool_result",
25
+ "json",
26
+ "pdf",
27
+ ]
28
+ """
29
+ A discriminated union of all content types.
30
+ """
31
+
18
32
 
19
- class Content:
33
+ class Content(BaseModel):
20
34
  """
21
35
  Base class for all content types that can be appear in a [](`~chatlas.Turn`)
22
36
  """
23
37
 
38
+ model_config = ConfigDict(arbitrary_types_allowed=True)
39
+ content_type: ContentTypeEnum
40
+
24
41
  def __str__(self):
25
42
  raise NotImplementedError
26
43
 
@@ -31,13 +48,13 @@ class Content:
31
48
  raise NotImplementedError
32
49
 
33
50
 
34
- @dataclass
35
51
  class ContentText(Content):
36
52
  """
37
53
  Text content for a [](`~chatlas.Turn`)
38
54
  """
39
55
 
40
56
  text: str
57
+ content_type: ContentTypeEnum = "text"
41
58
 
42
59
  def __str__(self):
43
60
  return self.text
@@ -62,7 +79,6 @@ class ContentImage(Content):
62
79
  pass
63
80
 
64
81
 
65
- @dataclass
66
82
  class ContentImageRemote(ContentImage):
67
83
  """
68
84
  Image content from a URL.
@@ -81,6 +97,8 @@ class ContentImageRemote(ContentImage):
81
97
  url: str
82
98
  detail: Literal["auto", "low", "high"] = "auto"
83
99
 
100
+ content_type: ContentTypeEnum = "image_remote"
101
+
84
102
  def __str__(self):
85
103
  return f"![]({self.url})"
86
104
 
@@ -94,7 +112,6 @@ class ContentImageRemote(ContentImage):
94
112
  )
95
113
 
96
114
 
97
- @dataclass
98
115
  class ContentImageInline(ContentImage):
99
116
  """
100
117
  Inline image content.
@@ -105,17 +122,19 @@ class ContentImageInline(ContentImage):
105
122
 
106
123
  Parameters
107
124
  ----------
108
- content_type
125
+ image_content_type
109
126
  The content type of the image.
110
127
  data
111
128
  The base64-encoded image data.
112
129
  """
113
130
 
114
- content_type: ImageContentTypes
131
+ image_content_type: ImageContentTypes
115
132
  data: Optional[str] = None
116
133
 
134
+ content_type: ContentTypeEnum = "image_inline"
135
+
117
136
  def __str__(self):
118
- return f"![](data:{self.content_type};base64,{self.data})"
137
+ return f"![](data:{self.image_content_type};base64,{self.data})"
119
138
 
120
139
  def _repr_markdown_(self):
121
140
  return self.__str__()
@@ -124,11 +143,10 @@ class ContentImageInline(ContentImage):
124
143
  n_bytes = len(self.data) if self.data else 0
125
144
  return (
126
145
  " " * indent
127
- + f"<ContentImageInline content_type='{self.content_type}' size={n_bytes}>"
146
+ + f"<ContentImageInline content_type='{self.image_content_type}' size={n_bytes}>"
128
147
  )
129
148
 
130
149
 
131
- @dataclass
132
150
  class ContentToolRequest(Content):
133
151
  """
134
152
  A request to call a tool/function
@@ -151,6 +169,8 @@ class ContentToolRequest(Content):
151
169
  name: str
152
170
  arguments: object
153
171
 
172
+ content_type: ContentTypeEnum = "tool_request"
173
+
154
174
  def __str__(self):
155
175
  args_str = self._arguments_str()
156
176
  func_call = f"{self.name}({args_str})"
@@ -173,7 +193,6 @@ class ContentToolRequest(Content):
173
193
  return str(self.arguments)
174
194
 
175
195
 
176
- @dataclass
177
196
  class ContentToolResult(Content):
178
197
  """
179
198
  The result of calling a tool/function
@@ -199,21 +218,26 @@ class ContentToolResult(Content):
199
218
  name: Optional[str] = None
200
219
  error: Optional[str] = None
201
220
 
202
- def _get_value_and_language(self) -> tuple[str, str]:
221
+ content_type: ContentTypeEnum = "tool_result"
222
+
223
+ def _get_value(self, pretty: bool = False) -> str:
203
224
  if self.error:
204
- return f"Tool calling failed with error: '{self.error}'", ""
225
+ return f"Tool calling failed with error: '{self.error}'"
226
+ if not pretty:
227
+ return str(self.value)
205
228
  try:
206
- json_val = json.loads(self.value)
207
- return pformat(json_val, indent=2, sort_dicts=False), "python"
208
- except: # noqa: E722
209
- return str(self.value), ""
229
+ json_val = json.loads(self.value) # type: ignore
230
+ return pformat(json_val, indent=2, sort_dicts=False)
231
+ except: # noqa
232
+ return str(self.value)
210
233
 
234
+ # Primarily used for `echo="all"`...
211
235
  def __str__(self):
212
236
  comment = f"# tool result ({self.id})"
213
- value, language = self._get_value_and_language()
214
-
215
- return f"""```{language}\n{comment}\n{value}\n```"""
237
+ value = self._get_value(pretty=True)
238
+ return f"""```python\n{comment}\n{value}\n```"""
216
239
 
240
+ # ... and for displaying in the notebook
217
241
  def _repr_markdown_(self):
218
242
  return self.__str__()
219
243
 
@@ -224,12 +248,11 @@ class ContentToolResult(Content):
224
248
  res += f" error='{self.error}'"
225
249
  return res + ">"
226
250
 
251
+ # The actual value to send to the model
227
252
  def get_final_value(self) -> str:
228
- value, _language = self._get_value_and_language()
229
- return value
253
+ return self._get_value()
230
254
 
231
255
 
232
- @dataclass
233
256
  class ContentJson(Content):
234
257
  """
235
258
  JSON content
@@ -245,6 +268,8 @@ class ContentJson(Content):
245
268
 
246
269
  value: dict[str, Any]
247
270
 
271
+ content_type: ContentTypeEnum = "json"
272
+
248
273
  def __str__(self):
249
274
  return json.dumps(self.value, indent=2)
250
275
 
@@ -253,3 +278,70 @@ class ContentJson(Content):
253
278
 
254
279
  def __repr__(self, indent: int = 0):
255
280
  return " " * indent + f"<ContentJson value={self.value}>"
281
+
282
+
283
+ class ContentPDF(Content):
284
+ """
285
+ PDF content
286
+
287
+ This content type primarily exists to signal PDF data extraction
288
+ (i.e., data extracted via [](`~chatlas.Chat`)'s `.extract_data()` method)
289
+
290
+ Parameters
291
+ ----------
292
+ value
293
+ The PDF data extracted
294
+ """
295
+
296
+ data: bytes
297
+
298
+ content_type: ContentTypeEnum = "pdf"
299
+
300
+ def __str__(self):
301
+ return "<PDF document>"
302
+
303
+ def _repr_markdown_(self):
304
+ return self.__str__()
305
+
306
+ def __repr__(self, indent: int = 0):
307
+ return " " * indent + f"<ContentPDF size={len(self.data)}>"
308
+
309
+
310
+ ContentUnion = Union[
311
+ ContentText,
312
+ ContentImageRemote,
313
+ ContentImageInline,
314
+ ContentToolRequest,
315
+ ContentToolResult,
316
+ ContentJson,
317
+ ContentPDF,
318
+ ]
319
+
320
+
321
+ def create_content(data: dict[str, Any]) -> ContentUnion:
322
+ """
323
+ Factory function to create the appropriate Content subclass based on the data.
324
+
325
+ This is useful when deserializing content from JSON.
326
+ """
327
+ if not isinstance(data, dict):
328
+ raise ValueError("Content data must be a dictionary")
329
+
330
+ ct = data.get("content_type")
331
+
332
+ if ct == "text":
333
+ return ContentText.model_validate(data)
334
+ elif ct == "image_remote":
335
+ return ContentImageRemote.model_validate(data)
336
+ elif ct == "image_inline":
337
+ return ContentImageInline.model_validate(data)
338
+ elif ct == "tool_request":
339
+ return ContentToolRequest.model_validate(data)
340
+ elif ct == "tool_result":
341
+ return ContentToolResult.model_validate(data)
342
+ elif ct == "json":
343
+ return ContentJson.model_validate(data)
344
+ elif ct == "pdf":
345
+ return ContentPDF.model_validate(data)
346
+ else:
347
+ raise ValueError(f"Unknown content type: {ct}")
chatlas/_content_image.py CHANGED
@@ -8,6 +8,7 @@ import warnings
8
8
  from typing import Literal, Union, cast
9
9
 
10
10
  from ._content import ContentImageInline, ContentImageRemote, ImageContentTypes
11
+ from ._content_pdf import parse_data_url
11
12
  from ._utils import MISSING, MISSING_TYPE
12
13
 
13
14
  __all__ = (
@@ -60,15 +61,11 @@ def content_image_url(
60
61
  raise ValueError("detail must be 'auto', 'low', or 'high'")
61
62
 
62
63
  if url.startswith("data:"):
63
- parts = url[5:].split(";", 1)
64
- if len(parts) != 2 or not parts[1].startswith("base64,"):
65
- raise ValueError("url is not a valid data URL.")
66
- content_type = parts[0]
67
- base64_data = parts[1][7:]
64
+ content_type, base64_data = parse_data_url(url)
68
65
  if content_type not in ["image/png", "image/jpeg", "image/webp", "image/gif"]:
69
66
  raise ValueError(f"Unsupported image content type: {content_type}")
70
67
  content_type = cast(ImageContentTypes, content_type)
71
- return ContentImageInline(content_type, base64_data)
68
+ return ContentImageInline(image_content_type=content_type, data=base64_data)
72
69
  else:
73
70
  return ContentImageRemote(url=url, detail=detail)
74
71
 
@@ -191,7 +188,7 @@ def content_image_file(
191
188
  img.save(buffer, format=img.format)
192
189
  base64_data = base64.b64encode(buffer.getvalue()).decode("utf-8")
193
190
 
194
- return ContentImageInline(content_type, base64_data)
191
+ return ContentImageInline(image_content_type=content_type, data=base64_data)
195
192
 
196
193
 
197
194
  def content_image_plot(
@@ -263,7 +260,7 @@ def content_image_plot(
263
260
  fig.savefig(buf, format="png", dpi=dpi, bbox_inches="tight")
264
261
  buf.seek(0)
265
262
  base64_data = base64.b64encode(buf.getvalue()).decode("utf-8")
266
- return ContentImageInline("image/png", base64_data)
263
+ return ContentImageInline(image_content_type="image/png", data=base64_data)
267
264
  finally:
268
265
  fig.set_size_inches(*size)
269
266