chatterer 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ import re
1
2
  from typing import (
2
3
  TYPE_CHECKING,
3
4
  Any,
@@ -11,7 +12,6 @@ from typing import (
11
12
  Type,
12
13
  TypeAlias,
13
14
  TypeVar,
14
- cast,
15
15
  overload,
16
16
  )
17
17
 
@@ -19,10 +19,11 @@ from langchain_core.language_models.base import LanguageModelInput
19
19
  from langchain_core.language_models.chat_models import BaseChatModel
20
20
  from langchain_core.runnables.base import Runnable
21
21
  from langchain_core.runnables.config import RunnableConfig
22
+ from langchain_core.utils.utils import secret_from_env
22
23
  from pydantic import BaseModel, Field
23
24
 
24
- from .messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
25
- from .utils.code_agent import CodeExecutionResult, FunctionSignature, get_default_repl_tool
25
+ from .messages import AIMessage, BaseMessage, HumanMessage, UsageMetadata
26
+ from .utils.code_agent import CodeExecutionResult, FunctionSignature, augment_prompt_for_toolcall
26
27
 
27
28
  if TYPE_CHECKING:
28
29
  from instructor import Partial
@@ -53,6 +54,8 @@ DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT = (
53
54
  )
54
55
  DEFAULT_FUNCTION_REFERENCE_SEPARATOR = "\n---\n" # Separator to distinguish different function references
55
56
 
57
+ PYTHON_CODE_PATTERN: re.Pattern[str] = re.compile(r"```(?:python\s*\n)?(.*?)```", re.DOTALL)
58
+
56
59
 
57
60
  class Chatterer(BaseModel):
58
61
  """Language model for generating text from a given input."""
@@ -60,37 +63,25 @@ class Chatterer(BaseModel):
60
63
  client: BaseChatModel
61
64
  structured_output_kwargs: dict[str, Any] = Field(default_factory=dict)
62
65
 
63
- @overload
64
- def __call__(
65
- self,
66
- messages: LanguageModelInput,
67
- response_model: Type[PydanticModelT],
68
- config: Optional[RunnableConfig] = None,
69
- stop: Optional[list[str]] = None,
70
- **kwargs: Any,
71
- ) -> PydanticModelT: ...
72
-
73
- @overload
74
- def __call__(
75
- self,
76
- messages: LanguageModelInput,
77
- response_model: None = None,
78
- config: Optional[RunnableConfig] = None,
79
- stop: Optional[list[str]] = None,
80
- **kwargs: Any,
81
- ) -> str: ...
82
-
83
- def __call__(
84
- self,
85
- messages: LanguageModelInput,
86
- response_model: Optional[Type[PydanticModelT]] = None,
87
- config: Optional[RunnableConfig] = None,
88
- stop: Optional[list[str]] = None,
89
- **kwargs: Any,
90
- ) -> str | PydanticModelT:
91
- if response_model:
92
- return self.generate_pydantic(response_model, messages, config, stop, **kwargs)
93
- return self.client.invoke(input=messages, config=config, stop=stop, **kwargs).text()
66
+ @classmethod
67
+ def from_provider(
68
+ cls,
69
+ provider_and_model: str,
70
+ structured_output_kwargs: Optional[dict[str, Any]] = {"strict": True},
71
+ ) -> Self:
72
+ backend, model = provider_and_model.split(":", 1)
73
+ if backend == "openai":
74
+ return cls.openai(model=model, structured_output_kwargs=structured_output_kwargs)
75
+ elif backend == "anthropic":
76
+ return cls.anthropic(model_name=model, structured_output_kwargs=structured_output_kwargs)
77
+ elif backend == "google":
78
+ return cls.google(model=model, structured_output_kwargs=structured_output_kwargs)
79
+ elif backend == "ollama":
80
+ return cls.ollama(model=model, structured_output_kwargs=structured_output_kwargs)
81
+ elif backend == "openrouter":
82
+ return cls.open_router(model=model, structured_output_kwargs=structured_output_kwargs)
83
+ else:
84
+ raise ValueError(f"Unsupported backend model: {backend}")
94
85
 
95
86
  @classmethod
96
87
  def openai(
@@ -141,6 +132,78 @@ class Chatterer(BaseModel):
141
132
  structured_output_kwargs=structured_output_kwargs or {},
142
133
  )
143
134
 
135
+ @classmethod
136
+ def open_router(
137
+ cls,
138
+ model: str = "openrouter/quasar-alpha",
139
+ structured_output_kwargs: Optional[dict[str, Any]] = None,
140
+ ) -> Self:
141
+ from langchain_openai import ChatOpenAI
142
+
143
+ return cls(
144
+ client=ChatOpenAI(
145
+ model=model,
146
+ base_url="https://openrouter.ai/api/v1",
147
+ api_key=secret_from_env("OPENROUTER_API_KEY", default=None)(),
148
+ ),
149
+ structured_output_kwargs=structured_output_kwargs or {},
150
+ )
151
+
152
+ @property
153
+ def invoke(self):
154
+ return self.client.invoke
155
+
156
+ @property
157
+ def ainvoke(self):
158
+ return self.client.ainvoke
159
+
160
+ @property
161
+ def stream(self):
162
+ return self.client.stream
163
+
164
+ @property
165
+ def astream(self):
166
+ return self.client.astream
167
+
168
+ @property
169
+ def bind_tools(self): # pyright: ignore[reportUnknownParameterType]
170
+ return self.client.bind_tools # pyright: ignore[reportUnknownParameterType, reportUnknownVariableType, reportUnknownMemberType]
171
+
172
+ def __getattr__(self, name: str) -> Any:
173
+ return getattr(self.client, name)
174
+
175
+ @overload
176
+ def __call__(
177
+ self,
178
+ messages: LanguageModelInput,
179
+ response_model: Type[PydanticModelT],
180
+ config: Optional[RunnableConfig] = None,
181
+ stop: Optional[list[str]] = None,
182
+ **kwargs: Any,
183
+ ) -> PydanticModelT: ...
184
+
185
+ @overload
186
+ def __call__(
187
+ self,
188
+ messages: LanguageModelInput,
189
+ response_model: None = None,
190
+ config: Optional[RunnableConfig] = None,
191
+ stop: Optional[list[str]] = None,
192
+ **kwargs: Any,
193
+ ) -> str: ...
194
+
195
+ def __call__(
196
+ self,
197
+ messages: LanguageModelInput,
198
+ response_model: Optional[Type[PydanticModelT]] = None,
199
+ config: Optional[RunnableConfig] = None,
200
+ stop: Optional[list[str]] = None,
201
+ **kwargs: Any,
202
+ ) -> str | PydanticModelT:
203
+ if response_model:
204
+ return self.generate_pydantic(response_model, messages, config, stop, **kwargs)
205
+ return self.client.invoke(input=messages, config=config, stop=stop, **kwargs).text()
206
+
144
207
  def generate(
145
208
  self,
146
209
  messages: LanguageModelInput,
@@ -279,39 +342,30 @@ class Chatterer(BaseModel):
279
342
  )
280
343
  ])
281
344
 
282
- @staticmethod
283
- def get_num_tokens_from_message(message: BaseMessage) -> Optional[tuple[int, int]]:
284
- try:
285
- if isinstance(message, AIMessage) and (usage_metadata := message.usage_metadata):
286
- input_tokens = int(usage_metadata["input_tokens"])
287
- output_tokens = int(usage_metadata["output_tokens"])
345
+ def get_approximate_token_count(self, message: BaseMessage) -> int:
346
+ return self.client.get_num_tokens_from_messages([message]) # pyright: ignore[reportUnknownMemberType]
347
+
348
+ def get_usage_metadata(self, message: BaseMessage) -> UsageMetadata:
349
+ if isinstance(message, AIMessage):
350
+ usage_metadata = message.usage_metadata
351
+ if usage_metadata is not None:
352
+ input_tokens = usage_metadata["input_tokens"]
353
+ output_tokens = usage_metadata["output_tokens"]
354
+ return {
355
+ "input_tokens": input_tokens,
356
+ "output_tokens": output_tokens,
357
+ "total_tokens": input_tokens + output_tokens,
358
+ }
288
359
  else:
289
- # Dynamic extraction for unknown structures
290
- input_tokens: Optional[int] = None
291
- output_tokens: Optional[int] = None
292
-
293
- def _find_tokens(obj: object) -> None:
294
- nonlocal input_tokens, output_tokens
295
- if isinstance(obj, dict):
296
- for key, value in cast(dict[object, object], obj).items():
297
- if isinstance(value, int):
298
- if "input" in str(key) or "prompt" in str(key):
299
- input_tokens = value
300
- elif "output" in str(key) or "completion" in str(key):
301
- output_tokens = value
302
- else:
303
- _find_tokens(value)
304
- elif isinstance(obj, list):
305
- for item in cast(list[object], obj):
306
- _find_tokens(item)
307
-
308
- _find_tokens(message.model_dump())
309
-
310
- if input_tokens is None or output_tokens is None:
311
- return None
312
- return input_tokens, output_tokens
313
- except Exception:
314
- return None
360
+ approx_tokens = self.get_approximate_token_count(message)
361
+ return {"input_tokens": 0, "output_tokens": approx_tokens, "total_tokens": approx_tokens}
362
+ else:
363
+ approx_tokens = self.get_approximate_token_count(message)
364
+ return {
365
+ "input_tokens": approx_tokens,
366
+ "output_tokens": 0,
367
+ "total_tokens": approx_tokens,
368
+ }
315
369
 
316
370
  def invoke_code_execution(
317
371
  self,
@@ -382,6 +436,15 @@ class Chatterer(BaseModel):
382
436
  class PythonCodeToExecute(BaseModel):
383
437
  code: str = Field(description="Python code to execute")
384
438
 
439
+ def model_post_init(self, context: object) -> None:
440
+ super().model_post_init(context)
441
+
442
+ codes: list[str] = []
443
+ for match in PYTHON_CODE_PATTERN.finditer(self.code):
444
+ codes.append(match.group(1))
445
+ if codes:
446
+ self.code = "\n".join(codes)
447
+
385
448
 
386
449
  def _with_structured_output(
387
450
  client: BaseChatModel,
@@ -389,189 +452,3 @@ def _with_structured_output(
389
452
  structured_output_kwargs: dict[str, Any],
390
453
  ) -> Runnable[LanguageModelInput, dict[object, object] | BaseModel]:
391
454
  return client.with_structured_output(schema=response_model, **structured_output_kwargs) # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
392
-
393
-
394
- # def _add_message_last(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
395
- # if isinstance(messages, str):
396
- # messages += f"\n{prompt_to_add}"
397
- # elif isinstance(messages, Sequence):
398
- # messages = list(messages)
399
- # messages.append(SystemMessage(content=prompt_to_add))
400
- # else:
401
- # messages = messages.to_messages()
402
- # messages.append(SystemMessage(content=prompt_to_add))
403
- # return messages
404
-
405
-
406
- def _add_message_first(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
407
- if isinstance(messages, str):
408
- messages = f"{prompt_to_add}\n{messages}"
409
- elif isinstance(messages, Sequence):
410
- messages = list(messages)
411
- messages.insert(0, SystemMessage(content=prompt_to_add))
412
- else:
413
- messages = messages.to_messages()
414
- messages.insert(0, SystemMessage(content=prompt_to_add))
415
- return messages
416
-
417
-
418
- def augment_prompt_for_toolcall(
419
- function_signatures: Iterable[FunctionSignature],
420
- messages: LanguageModelInput,
421
- prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
422
- function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
423
- function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
424
- ) -> LanguageModelInput:
425
- if function_signatures:
426
- messages = _add_message_first(
427
- messages=messages,
428
- prompt_to_add=FunctionSignature.as_prompt(
429
- function_signatures, function_reference_prefix, function_reference_seperator
430
- ),
431
- )
432
- if prompt_for_code_invoke:
433
- messages = _add_message_first(messages=messages, prompt_to_add=prompt_for_code_invoke)
434
- return messages
435
-
436
-
437
- def interactive_shell(
438
- chatterer: Chatterer = Chatterer.openai(),
439
- system_instruction: BaseMessage | Iterable[BaseMessage] = ([
440
- SystemMessage("You are an AI that can answer questions and execute Python code."),
441
- ]),
442
- repl_tool: Optional["PythonAstREPLTool"] = None,
443
- prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
444
- additional_callables: Optional[Callable[..., object] | Sequence[Callable[..., object]]] = None,
445
- function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
446
- function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
447
- config: Optional[RunnableConfig] = None,
448
- stop: Optional[list[str]] = None,
449
- **kwargs: Any,
450
- ) -> None:
451
- from rich.console import Console
452
- from rich.prompt import Prompt
453
-
454
- # 코드 실행 필요 여부를 판단하는 모델
455
- class IsCodeExecutionNeeded(BaseModel):
456
- is_code_execution_needed: bool = Field(
457
- description="Whether Python tool calling is needed to answer user query."
458
- )
459
-
460
- # 추가 코드 실행 필요 여부를 판단하는 모델
461
- class IsFurtherCodeExecutionNeeded(BaseModel):
462
- review_on_code_execution: str = Field(description="Review on the code execution.")
463
- next_action: str = Field(description="Next action to take.")
464
- is_further_code_execution_needed: bool = Field(
465
- description="Whether further Python tool calling is needed to answer user query."
466
- )
467
-
468
- def respond(messages: list[BaseMessage]) -> str:
469
- # AI 응답 스트리밍 출력
470
- console.print("[bold blue]AI:[/bold blue] ", end="")
471
- response = ""
472
- for chunk in chatterer.generate_stream(messages=messages):
473
- response += chunk
474
- console.print(chunk, end="")
475
- console.print() # 응답 후 줄바꿈 추가
476
- return response.strip()
477
-
478
- def code_session_returning_end_of_turn() -> bool:
479
- code_session_messages: list[BaseMessage] = []
480
- while True:
481
- code_execution: CodeExecutionResult = chatterer.invoke_code_execution(
482
- messages=context,
483
- repl_tool=repl_tool,
484
- prompt_for_code_invoke=prompt_for_code_invoke,
485
- function_signatures=function_signatures,
486
- function_reference_prefix=function_reference_prefix,
487
- function_reference_seperator=function_reference_seperator,
488
- config=config,
489
- stop=stop,
490
- **kwargs,
491
- )
492
- if code_execution.code.strip() in ("", "quit", "exit", "pass"):
493
- return False
494
-
495
- last_tool_use_message = AIMessage(
496
- content=f"Executed code:\n```python\n{code_execution.code}\n```\nOutput:\n{code_execution.output}".strip()
497
- )
498
- code_session_messages.append(last_tool_use_message)
499
- console.print("[bold yellow]Executed code:[/bold yellow]")
500
- console.print(f"[code]{code_execution.code}[/code]")
501
- console.print("[bold yellow]Output:[/bold yellow]")
502
- console.print(code_execution.output)
503
-
504
- decision = chatterer.generate_pydantic(
505
- response_model=IsFurtherCodeExecutionNeeded,
506
- messages=augment_prompt_for_toolcall(
507
- function_signatures=function_signatures,
508
- messages=context + code_session_messages,
509
- prompt_for_code_invoke=prompt_for_code_invoke,
510
- function_reference_prefix=function_reference_prefix,
511
- function_reference_seperator=function_reference_seperator,
512
- ),
513
- )
514
- review_on_code_execution = decision.review_on_code_execution.strip()
515
- next_action = decision.next_action.strip()
516
- console.print("[bold blue]AI:[/bold blue]")
517
- console.print(f"-[bold yellow]Review on code execution:[/bold yellow] {review_on_code_execution}")
518
- console.print(f"-[bold yellow]Next Action:[/bold yellow] {next_action}")
519
- code_session_messages.append(
520
- AIMessage(
521
- content=f"- Review upon code execution: {review_on_code_execution}\n- Next Action: {next_action}".strip()
522
- )
523
- )
524
- if not decision.is_further_code_execution_needed:
525
- response: str = respond(context + code_session_messages)
526
- context.append(last_tool_use_message)
527
- context.append(AIMessage(content=response))
528
- return True
529
-
530
- # REPL 도구 초기화
531
- if repl_tool is None:
532
- repl_tool = get_default_repl_tool()
533
-
534
- function_signatures: list[FunctionSignature] = FunctionSignature.from_callable(additional_callables)
535
- console = Console()
536
- context: list[BaseMessage] = []
537
- if system_instruction:
538
- if isinstance(system_instruction, BaseMessage):
539
- context.append(system_instruction)
540
- else:
541
- context.extend(system_instruction)
542
-
543
- # 환영 메시지
544
- console.print("[bold blue]Welcome to the Interactive Chatterer Shell![/bold blue]")
545
- console.print("Type 'quit' or 'exit' to end the conversation.")
546
-
547
- while True:
548
- # 사용자 입력 받기
549
- user_input = Prompt.ask("[bold green]You[/bold green]")
550
- if user_input.lower() in ["quit", "exit"]:
551
- console.print("[bold blue]Goodbye![/bold blue]")
552
- break
553
-
554
- context.append(HumanMessage(content=user_input))
555
-
556
- # 코드 실행 필요 여부 판단
557
- decision = chatterer.generate_pydantic(
558
- response_model=IsCodeExecutionNeeded,
559
- messages=augment_prompt_for_toolcall(
560
- function_signatures=function_signatures,
561
- messages=context,
562
- prompt_for_code_invoke=prompt_for_code_invoke,
563
- function_reference_prefix=function_reference_prefix,
564
- function_reference_seperator=function_reference_seperator,
565
- ),
566
- )
567
-
568
- # 코드 실행 처리
569
- if decision.is_code_execution_needed and code_session_returning_end_of_turn():
570
- continue
571
-
572
- # AI 응답 스트리밍 출력
573
- context.append(AIMessage(content=respond(context)))
574
-
575
-
576
- if __name__ == "__main__":
577
- interactive_shell()
chatterer/messages.py CHANGED
@@ -1,4 +1,13 @@
1
- from langchain_core.messages import AIMessage, BaseMessage, FunctionMessage, HumanMessage, SystemMessage
1
+ from langchain_core.language_models.base import LanguageModelInput
2
+ from langchain_core.messages import (
3
+ AIMessage,
4
+ BaseMessage,
5
+ BaseMessageChunk,
6
+ FunctionMessage,
7
+ HumanMessage,
8
+ SystemMessage,
9
+ )
10
+ from langchain_core.messages.ai import UsageMetadata
2
11
 
3
12
  __all__ = [
4
13
  "AIMessage",
@@ -6,4 +15,7 @@ __all__ = [
6
15
  "HumanMessage",
7
16
  "SystemMessage",
8
17
  "FunctionMessage",
18
+ "BaseMessageChunk",
19
+ "UsageMetadata",
20
+ "LanguageModelInput",
9
21
  ]
@@ -1,19 +1,23 @@
1
+ from .caption_markdown_images import MarkdownLink, acaption_markdown_images, caption_markdown_images
1
2
  from .citation_chunking import citation_chunker
3
+ from .convert_pdf_to_markdown import PdfToMarkdown, extract_text_from_pdf, open_pdf, render_pdf_as_image
2
4
  from .convert_to_text import (
5
+ CodeSnippets,
3
6
  anything_to_markdown,
4
7
  get_default_html_to_markdown_options,
5
8
  html_to_markdown,
6
9
  pdf_to_text,
7
10
  pyscripts_to_snippets,
8
11
  )
9
- from .youtube import get_youtube_video_subtitle, get_youtube_video_details
10
-
11
-
12
- def init_webpage_to_markdown():
13
- from . import webpage_to_markdown
14
-
15
- return webpage_to_markdown
16
-
12
+ from .upstage_document_parser import UpstageDocumentParseParser
13
+ from .webpage_to_markdown import (
14
+ PlayWrightBot,
15
+ PlaywrightLaunchOptions,
16
+ PlaywrightOptions,
17
+ PlaywrightPersistencyOptions,
18
+ get_default_playwright_launch_options,
19
+ )
20
+ from .youtube import get_youtube_video_details, get_youtube_video_subtitle
17
21
 
18
22
  __all__ = [
19
23
  "html_to_markdown",
@@ -22,7 +26,21 @@ __all__ = [
22
26
  "get_default_html_to_markdown_options",
23
27
  "pyscripts_to_snippets",
24
28
  "citation_chunker",
25
- "init_webpage_to_markdown",
29
+ "webpage_to_markdown",
26
30
  "get_youtube_video_subtitle",
27
31
  "get_youtube_video_details",
32
+ "CodeSnippets",
33
+ "PlayWrightBot",
34
+ "PlaywrightLaunchOptions",
35
+ "PlaywrightOptions",
36
+ "PlaywrightPersistencyOptions",
37
+ "get_default_playwright_launch_options",
38
+ "UpstageDocumentParseParser",
39
+ "acaption_markdown_images",
40
+ "caption_markdown_images",
41
+ "MarkdownLink",
42
+ "PdfToMarkdown",
43
+ "extract_text_from_pdf",
44
+ "open_pdf",
45
+ "render_pdf_as_image",
28
46
  ]