chatterer 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chatterer/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- from .language_model import Chatterer
1
+ from .language_model import Chatterer, interactive_shell
2
2
  from .messages import (
3
3
  AIMessage,
4
4
  BaseMessage,
@@ -16,10 +16,12 @@ from .tools import (
16
16
  anything_to_markdown,
17
17
  citation_chunker,
18
18
  get_default_html_to_markdown_options,
19
+ get_youtube_video_subtitle,
19
20
  html_to_markdown,
20
21
  init_webpage_to_markdown,
21
22
  pdf_to_text,
22
23
  pyscripts_to_snippets,
24
+ get_youtube_video_details,
23
25
  )
24
26
  from .utils import (
25
27
  Base64Image,
@@ -52,4 +54,7 @@ __all__ = [
52
54
  "CodeExecutionResult",
53
55
  "get_default_repl_tool",
54
56
  "insert_callables_into_global",
57
+ "get_youtube_video_subtitle",
58
+ "get_youtube_video_details",
59
+ "interactive_shell",
55
60
  ]
@@ -22,7 +22,7 @@ from langchain_core.runnables.config import RunnableConfig
22
22
  from pydantic import BaseModel, Field
23
23
 
24
24
  from .messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
25
- from .utils.code_agent import CodeExecutionResult, FunctionSignature
25
+ from .utils.code_agent import CodeExecutionResult, FunctionSignature, get_default_repl_tool
26
26
 
27
27
  if TYPE_CHECKING:
28
28
  from instructor import Partial
@@ -35,13 +35,11 @@ DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION = "Provide a detailed description of all v
35
35
  DEFAULT_CODE_GENERATION_PROMPT = (
36
36
  "You are utilizing a Python code execution tool now.\n"
37
37
  "Your goal is to generate Python code that solves the task efficiently and appends both the code and its output to your context memory.\n"
38
- "Since your context window is highly limited, type `pass` if no code execution is needed.\n"
39
38
  "\n"
40
39
  "To optimize tool efficiency, follow these guidelines:\n"
41
40
  "- Write concise, efficient code that directly serves the intended purpose.\n"
42
41
  "- Avoid unnecessary operations (e.g., excessive loops, recursion, or heavy computations).\n"
43
42
  "- Handle potential errors gracefully (e.g., using try-except blocks).\n"
44
- "- Prevent excessive output by limiting print statements to essential information only (e.g., avoid printing large datasets).\n"
45
43
  "\n"
46
44
  "Return your response strictly in the following JSON format:\n"
47
45
  '{\n "code": "<your_python_code_here>"\n}\n\n'
@@ -189,7 +187,7 @@ class Chatterer(BaseModel):
189
187
  stop: Optional[list[str]] = None,
190
188
  **kwargs: Any,
191
189
  ) -> PydanticModelT:
192
- result: StructuredOutputType = with_structured_output(
190
+ result: StructuredOutputType = _with_structured_output(
193
191
  client=self.client,
194
192
  response_model=response_model,
195
193
  structured_output_kwargs=self.structured_output_kwargs,
@@ -207,7 +205,7 @@ class Chatterer(BaseModel):
207
205
  stop: Optional[list[str]] = None,
208
206
  **kwargs: Any,
209
207
  ) -> PydanticModelT:
210
- result: StructuredOutputType = await with_structured_output(
208
+ result: StructuredOutputType = await _with_structured_output(
211
209
  client=self.client,
212
210
  response_model=response_model,
213
211
  structured_output_kwargs=self.structured_output_kwargs,
@@ -231,7 +229,7 @@ class Chatterer(BaseModel):
231
229
  raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
232
230
 
233
231
  partial_response_model = instructor.Partial[response_model]
234
- for chunk in with_structured_output(
232
+ for chunk in _with_structured_output(
235
233
  client=self.client,
236
234
  response_model=partial_response_model,
237
235
  structured_output_kwargs=self.structured_output_kwargs,
@@ -252,7 +250,7 @@ class Chatterer(BaseModel):
252
250
  raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
253
251
 
254
252
  partial_response_model = instructor.Partial[response_model]
255
- async for chunk in with_structured_output(
253
+ async for chunk in _with_structured_output(
256
254
  client=self.client,
257
255
  response_model=partial_response_model,
258
256
  structured_output_kwargs=self.structured_output_kwargs,
@@ -320,26 +318,24 @@ class Chatterer(BaseModel):
320
318
  messages: LanguageModelInput,
321
319
  repl_tool: Optional["PythonAstREPLTool"] = None,
322
320
  prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
323
- additional_callables: Optional[Callable[..., object] | Sequence[Callable[..., object]]] = None,
321
+ function_signatures: Optional[FunctionSignature | Iterable[FunctionSignature]] = None,
324
322
  function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
325
323
  function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
326
324
  config: Optional[RunnableConfig] = None,
327
325
  stop: Optional[list[str]] = None,
328
326
  **kwargs: Any,
329
327
  ) -> CodeExecutionResult:
330
- function_signatures: Optional[list[FunctionSignature]] = None
331
- if additional_callables:
332
- if not isinstance(additional_callables, Iterable):
333
- additional_callables = (additional_callables,)
334
- function_signatures = FunctionSignature.from_callables(additional_callables)
335
- messages = _add_message_last(
336
- messages=messages,
337
- prompt_to_add=FunctionSignature.as_prompt(
338
- function_signatures, function_reference_prefix, function_reference_seperator
339
- ),
340
- )
341
- if prompt_for_code_invoke:
342
- messages = _add_message_last(messages=messages, prompt_to_add=prompt_for_code_invoke)
328
+ if not function_signatures:
329
+ function_signatures = []
330
+ elif isinstance(function_signatures, FunctionSignature):
331
+ function_signatures = [function_signatures]
332
+ messages = augment_prompt_for_toolcall(
333
+ function_signatures=function_signatures,
334
+ messages=messages,
335
+ prompt_for_code_invoke=prompt_for_code_invoke,
336
+ function_reference_prefix=function_reference_prefix,
337
+ function_reference_seperator=function_reference_seperator,
338
+ )
343
339
  code_obj: PythonCodeToExecute = self.generate_pydantic(
344
340
  response_model=PythonCodeToExecute, messages=messages, config=config, stop=stop, **kwargs
345
341
  )
@@ -363,19 +359,14 @@ class Chatterer(BaseModel):
363
359
  stop: Optional[list[str]] = None,
364
360
  **kwargs: Any,
365
361
  ) -> CodeExecutionResult:
366
- function_signatures: Optional[list[FunctionSignature]] = None
367
- if additional_callables:
368
- if not isinstance(additional_callables, Iterable):
369
- additional_callables = (additional_callables,)
370
- function_signatures = FunctionSignature.from_callables(additional_callables)
371
- messages = _add_message_last(
372
- messages=messages,
373
- prompt_to_add=FunctionSignature.as_prompt(
374
- function_signatures, function_reference_prefix, function_reference_seperator
375
- ),
376
- )
377
- if prompt_for_code_invoke:
378
- messages = _add_message_last(messages=messages, prompt_to_add=prompt_for_code_invoke)
362
+ function_signatures: list[FunctionSignature] = FunctionSignature.from_callable(additional_callables)
363
+ messages = augment_prompt_for_toolcall(
364
+ function_signatures=function_signatures,
365
+ messages=messages,
366
+ prompt_for_code_invoke=prompt_for_code_invoke,
367
+ function_reference_prefix=function_reference_prefix,
368
+ function_reference_seperator=function_reference_seperator,
369
+ )
379
370
  code_obj: PythonCodeToExecute = await self.agenerate_pydantic(
380
371
  response_model=PythonCodeToExecute, messages=messages, config=config, stop=stop, **kwargs
381
372
  )
@@ -392,7 +383,7 @@ class PythonCodeToExecute(BaseModel):
392
383
  code: str = Field(description="Python code to execute")
393
384
 
394
385
 
395
- def with_structured_output(
386
+ def _with_structured_output(
396
387
  client: BaseChatModel,
397
388
  response_model: Type["PydanticModelT | Partial[PydanticModelT]"],
398
389
  structured_output_kwargs: dict[str, Any],
@@ -400,93 +391,191 @@ def with_structured_output(
400
391
  return client.with_structured_output(schema=response_model, **structured_output_kwargs) # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
401
392
 
402
393
 
403
- def _add_message_last(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
394
+ # def _add_message_last(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
395
+ # if isinstance(messages, str):
396
+ # messages += f"\n{prompt_to_add}"
397
+ # elif isinstance(messages, Sequence):
398
+ # messages = list(messages)
399
+ # messages.append(SystemMessage(content=prompt_to_add))
400
+ # else:
401
+ # messages = messages.to_messages()
402
+ # messages.append(SystemMessage(content=prompt_to_add))
403
+ # return messages
404
+
405
+
406
+ def _add_message_first(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
404
407
  if isinstance(messages, str):
405
- messages += f"\n{prompt_to_add}"
408
+ messages = f"{prompt_to_add}\n{messages}"
406
409
  elif isinstance(messages, Sequence):
407
410
  messages = list(messages)
408
- messages.append(SystemMessage(content=prompt_to_add))
411
+ messages.insert(0, SystemMessage(content=prompt_to_add))
409
412
  else:
410
413
  messages = messages.to_messages()
411
- messages.append(SystemMessage(content=prompt_to_add))
414
+ messages.insert(0, SystemMessage(content=prompt_to_add))
412
415
  return messages
413
416
 
414
417
 
415
- # def _add_message_first(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
416
- # if isinstance(messages, str):
417
- # messages = f"{prompt_to_add}\n{messages}"
418
- # elif isinstance(messages, Sequence):
419
- # messages = list(messages)
420
- # messages.insert(0, SystemMessage(content=prompt_to_add))
421
- # else:
422
- # messages = messages.to_messages()
423
- # messages.insert(0, SystemMessage(content=prompt_to_add))
424
- # return messages
425
-
418
+ def augment_prompt_for_toolcall(
419
+ function_signatures: Iterable[FunctionSignature],
420
+ messages: LanguageModelInput,
421
+ prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
422
+ function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
423
+ function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
424
+ ) -> LanguageModelInput:
425
+ if function_signatures:
426
+ messages = _add_message_first(
427
+ messages=messages,
428
+ prompt_to_add=FunctionSignature.as_prompt(
429
+ function_signatures, function_reference_prefix, function_reference_seperator
430
+ ),
431
+ )
432
+ if prompt_for_code_invoke:
433
+ messages = _add_message_first(messages=messages, prompt_to_add=prompt_for_code_invoke)
434
+ return messages
426
435
 
427
- def chatbot_example(chatterer: Chatterer = Chatterer.openai()) -> None:
428
- # Define the CodeExecutionDecision class using Pydantic
429
436
 
437
+ def interactive_shell(
438
+ chatterer: Chatterer = Chatterer.openai(),
439
+ system_instruction: BaseMessage | Iterable[BaseMessage] = ([
440
+ SystemMessage("You are an AI that can answer questions and execute Python code."),
441
+ ]),
442
+ repl_tool: Optional["PythonAstREPLTool"] = None,
443
+ prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
444
+ additional_callables: Optional[Callable[..., object] | Sequence[Callable[..., object]]] = None,
445
+ function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
446
+ function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
447
+ config: Optional[RunnableConfig] = None,
448
+ stop: Optional[list[str]] = None,
449
+ **kwargs: Any,
450
+ ) -> None:
430
451
  from rich.console import Console
431
452
  from rich.prompt import Prompt
432
453
 
433
- class CodeExecutionDecision(BaseModel):
454
+ # 코드 실행 필요 여부를 판단하는 모델
455
+ class IsCodeExecutionNeeded(BaseModel):
434
456
  is_code_execution_needed: bool = Field(
435
457
  description="Whether Python tool calling is needed to answer user query."
436
458
  )
437
459
 
438
- # Initialize Rich console
439
- console = Console()
460
+ # 추가 코드 실행 필요 여부를 판단하는 모델
461
+ class IsFurtherCodeExecutionNeeded(BaseModel):
462
+ review_on_code_execution: str = Field(description="Review on the code execution.")
463
+ next_action: str = Field(description="Next action to take.")
464
+ is_further_code_execution_needed: bool = Field(
465
+ description="Whether further Python tool calling is needed to answer user query."
466
+ )
467
+
468
+ # REPL 도구 초기화
469
+ if repl_tool is None:
470
+ repl_tool = get_default_repl_tool()
440
471
 
441
- # Initialize conversation context
442
- context: list[BaseMessage] = [SystemMessage("You are an AI that can answer questions and execute Python code.")]
472
+ function_signatures: list[FunctionSignature] = FunctionSignature.from_callable(additional_callables)
473
+ console = Console()
474
+ context: list[BaseMessage] = []
475
+ if system_instruction:
476
+ if isinstance(system_instruction, BaseMessage):
477
+ context.append(system_instruction)
478
+ else:
479
+ context.extend(system_instruction)
443
480
 
444
- # Display welcome message
445
- console.print("[bold blue]Welcome to the Rich-based chatbot![/bold blue]")
481
+ # 환영 메시지
482
+ console.print("[bold blue]Welcome to the Interactive Chatterer Shell![/bold blue]")
446
483
  console.print("Type 'quit' or 'exit' to end the conversation.")
447
484
 
448
485
  while True:
449
- # Get user input
486
+ # 사용자 입력 받기
450
487
  user_input = Prompt.ask("[bold green]You[/bold green]")
451
488
  if user_input.lower() in ["quit", "exit"]:
452
489
  console.print("[bold blue]Goodbye![/bold blue]")
453
490
  break
454
491
 
455
- # Add user message to context
456
492
  context.append(HumanMessage(content=user_input))
457
493
 
458
- # Determine if code execution is needed
494
+ # 코드 실행 필요 여부 판단
459
495
  decision = chatterer.generate_pydantic(
460
- response_model=CodeExecutionDecision, # Use response_model instead of pydantic_model
461
- messages=context,
496
+ response_model=IsCodeExecutionNeeded,
497
+ messages=augment_prompt_for_toolcall(
498
+ function_signatures=function_signatures,
499
+ messages=context,
500
+ prompt_for_code_invoke=prompt_for_code_invoke,
501
+ function_reference_prefix=function_reference_prefix,
502
+ function_reference_seperator=function_reference_seperator,
503
+ ),
462
504
  )
463
505
 
506
+ # 코드 실행 처리
464
507
  if decision.is_code_execution_needed:
465
- # Execute code if needed
466
- code_result = chatterer.invoke_code_execution(messages=context)
508
+ code_result = chatterer.invoke_code_execution(
509
+ messages=context,
510
+ repl_tool=repl_tool,
511
+ prompt_for_code_invoke=prompt_for_code_invoke,
512
+ function_signatures=function_signatures,
513
+ function_reference_prefix=function_reference_prefix,
514
+ function_reference_seperator=function_reference_seperator,
515
+ config=config,
516
+ stop=stop,
517
+ **kwargs,
518
+ )
519
+
467
520
  if code_result.code.strip() == "pass":
468
- new_message = None
521
+ tool_use_message = None
469
522
  else:
470
- new_message = SystemMessage(
471
- content=f"Executed code:\n```python\n{code_result.code}\n```\nOutput:\n{code_result.output}"
472
- )
473
- console.print("[bold yellow]Executed code:[/bold yellow]")
474
- console.print(f"[code]{code_result.code}[/code]")
475
- console.print("[bold yellow]Output:[/bold yellow]")
476
- console.print(code_result.output)
523
+ code_session_messages: list[BaseMessage] = []
524
+ while True:
525
+ code_execution_message = AIMessage(
526
+ content=f"Executed code:\n```python\n{code_result.code}\n```\nOutput:\n{code_result.output}".strip()
527
+ )
528
+ code_session_messages.append(code_execution_message)
529
+ console.print("[bold yellow]Executed code:[/bold yellow]")
530
+ console.print(f"[code]{code_result.code}[/code]")
531
+ console.print("[bold yellow]Output:[/bold yellow]")
532
+ console.print(code_result.output)
533
+
534
+ decision = chatterer.generate_pydantic(
535
+ response_model=IsFurtherCodeExecutionNeeded,
536
+ messages=augment_prompt_for_toolcall(
537
+ function_signatures=function_signatures,
538
+ messages=context + code_session_messages,
539
+ prompt_for_code_invoke=prompt_for_code_invoke,
540
+ function_reference_prefix=function_reference_prefix,
541
+ function_reference_seperator=function_reference_seperator,
542
+ ),
543
+ )
544
+ review_on_code_execution = decision.review_on_code_execution.strip()
545
+ next_action = decision.next_action.strip()
546
+ console.print("[bold blue]AI:[/bold blue]")
547
+ console.print(f"-[bold yellow]Review on code execution:[/bold yellow] {review_on_code_execution}")
548
+ console.print(f"-[bold yellow]Next Action:[/bold yellow] {next_action}")
549
+ code_session_messages.append(
550
+ AIMessage(
551
+ content=f"- Review upon code execution: {review_on_code_execution}\n- Next Action: {next_action}".strip()
552
+ )
553
+ )
554
+ if not decision.is_further_code_execution_needed:
555
+ tool_use_message = code_execution_message
556
+ break
477
557
  else:
478
- # No code execution required
479
- new_message = None
480
-
481
- # Add system message to context
482
- if new_message:
483
- context.append(new_message)
484
-
485
- # Generate and display chatbot response
486
- response = chatterer.generate(messages=context) # Use generate instead of generate_response
558
+ tool_use_message = None
559
+
560
+ # 코드 실행 결과 컨텍스트에 추가
561
+ if tool_use_message:
562
+ context.append(tool_use_message)
563
+
564
+ # AI 응답 스트리밍 출력
565
+ console.print("[bold blue]AI:[/bold blue] ", end="")
566
+ response = ""
567
+ for chunk in chatterer.generate_stream(messages=context):
568
+ response += chunk
569
+ console.print(chunk, end="")
570
+
571
+ # 전체 응답 처리 후 컨텍스트에 추가
572
+ lines = response.split("\n")
573
+ if lines:
574
+ lines[-1] = lines[-1].rstrip() # 마지막 줄의 오른쪽 공백 제거
575
+ response = "\n".join(lines).strip()
487
576
  context.append(AIMessage(content=response))
488
- console.print(f"[bold blue]Chatbot:[/bold blue] {response}")
577
+ console.print() # 응답 줄바꿈 추가
489
578
 
490
579
 
491
580
  if __name__ == "__main__":
492
- chatbot_example()
581
+ interactive_shell()
@@ -6,6 +6,7 @@ from .convert_to_text import (
6
6
  pdf_to_text,
7
7
  pyscripts_to_snippets,
8
8
  )
9
+ from .youtube import get_youtube_video_subtitle, get_youtube_video_details
9
10
 
10
11
 
11
12
  def init_webpage_to_markdown():
@@ -22,4 +23,6 @@ __all__ = [
22
23
  "pyscripts_to_snippets",
23
24
  "citation_chunker",
24
25
  "init_webpage_to_markdown",
26
+ "get_youtube_video_subtitle",
27
+ "get_youtube_video_details",
25
28
  ]
@@ -0,0 +1,146 @@
1
+ import json
2
+ import unicodedata
3
+ import urllib.parse
4
+ from dataclasses import dataclass
5
+ from typing import Any, Optional, Self, cast
6
+
7
+ import requests
8
+
9
+
10
+ def get_youtube_video_details(
11
+ query: str,
12
+ ) -> list[dict[str, Optional[str]]]:
13
+ """Search for video metadata on YouTube using the given query. Returns a list of dictionaries containing `video_id`, `title`, `channel`, `duration`, `views`, `publish_time`, and `long_desc`."""
14
+ return [
15
+ {
16
+ "video_id": video_id,
17
+ "title": video.title,
18
+ "channel": video.channel,
19
+ "duration": video.duration,
20
+ "views": video.views,
21
+ "publish_time": video.publish_time,
22
+ "long_desc": video.long_desc,
23
+ }
24
+ for video in YoutubeSearchResult.from_query(base_url="https://youtube.com", query=query, max_results=10)
25
+ if (video_id := _get_video_id(video.url_suffix))
26
+ ]
27
+
28
+
29
+ def get_youtube_video_subtitle(video_id: str) -> str:
30
+ """Get the transcript of a YouTube video using the given video ID."""
31
+
32
+ from youtube_transcript_api._api import YouTubeTranscriptApi
33
+
34
+ get_transcript = YouTubeTranscriptApi.get_transcript # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
35
+ list_transcripts = YouTubeTranscriptApi.list_transcripts # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
36
+
37
+ result: str = ""
38
+ buffer_timestamp: str = "0s"
39
+ buffer_texts: list[str] = []
40
+ for entry in get_transcript(video_id, languages=(next(iter(list_transcripts(video_id))).language_code,)): # pyright: ignore[reportUnknownVariableType]
41
+ entry = cast(dict[object, object], entry)
42
+ text: str = str(entry.get("text", "")).strip().replace("\n", " ")
43
+ if not text:
44
+ continue
45
+ if len(buffer_texts) >= 10 or _is_special_char(text) or (buffer_texts and _is_special_char(buffer_texts[-1])):
46
+ result += f"[{buffer_timestamp}] {'. '.join(buffer_texts)}\n"
47
+ start = entry.get("start", 0)
48
+ if start:
49
+ buffer_timestamp = f"{start:.0f}s"
50
+ buffer_texts = [text]
51
+ else:
52
+ buffer_texts.append(text)
53
+
54
+ if buffer_texts:
55
+ result += f"[{buffer_timestamp}] {' '.join(buffer_texts)}"
56
+ return result
57
+
58
+
59
+ def _get_video_id(suffix: str) -> str:
60
+ urllib_parse_result = urllib.parse.urlparse(suffix)
61
+ if urllib_parse_result.path.startswith("/shorts/"):
62
+ # Fore shorts (/shorts/...) the video ID is in the path
63
+ parts = urllib_parse_result.path.split("/")
64
+ if len(parts) < 3:
65
+ print(f"Failed to get video ID from {suffix}")
66
+ return ""
67
+ return parts[2]
68
+
69
+ query: str = urllib.parse.urlparse(suffix).query
70
+ query_strings = urllib.parse.parse_qs(query)
71
+ if "v" not in query_strings:
72
+ print(f"Failed to get video ID from {suffix}")
73
+ return ""
74
+ return next(iter(query_strings["v"]), "")
75
+
76
+
77
+ def _is_special_char(text: str) -> bool:
78
+ if not text:
79
+ return False
80
+ return not unicodedata.category(text[0]).startswith("L")
81
+
82
+
83
+ @dataclass
84
+ class YoutubeSearchResult:
85
+ url_suffix: str
86
+ id: Optional[str]
87
+ thumbnails: list[str]
88
+ title: Optional[str]
89
+ long_desc: Optional[str]
90
+ channel: Optional[str]
91
+ duration: Optional[str]
92
+ views: Optional[str]
93
+ publish_time: Optional[str]
94
+
95
+ @classmethod
96
+ def from_query(cls, base_url: str, query: str, max_results: int) -> list[Self]:
97
+ url: str = f"{base_url}/results?search_query={urllib.parse.quote_plus(query)}"
98
+ response: str = requests.get(url).text
99
+ while "ytInitialData" not in response:
100
+ response = requests.get(url).text
101
+ results: list[Self] = cls.parse_html(response)
102
+ return results[:max_results]
103
+
104
+ @classmethod
105
+ def parse_html(cls, html: str) -> list[Self]:
106
+ results: list[Self] = []
107
+ start: int = html.index("ytInitialData") + len("ytInitialData") + 3
108
+ end: int = html.index("};", start) + 1
109
+ data: Any = json.loads(html[start:end])
110
+ for contents in data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"][
111
+ "contents"
112
+ ]:
113
+ for video in contents["itemSectionRenderer"]["contents"]:
114
+ if "videoRenderer" in video.keys():
115
+ video_data = video.get("videoRenderer", {})
116
+ suffix = (
117
+ video_data.get("navigationEndpoint", {})
118
+ .get("commandMetadata", {})
119
+ .get("webCommandMetadata", {})
120
+ .get("url", None)
121
+ )
122
+ if not suffix:
123
+ continue
124
+ res = cls(
125
+ id=video_data.get("videoId", None),
126
+ thumbnails=[
127
+ thumb.get("url", None) for thumb in video_data.get("thumbnail", {}).get("thumbnails", [{}])
128
+ ],
129
+ title=video_data.get("title", {}).get("runs", [[{}]])[0].get("text", None),
130
+ long_desc=video_data.get("descriptionSnippet", {}).get("runs", [{}])[0].get("text", None),
131
+ channel=video_data.get("longBylineText", {}).get("runs", [[{}]])[0].get("text", None),
132
+ duration=video_data.get("lengthText", {}).get("simpleText", 0),
133
+ views=video_data.get("viewCountText", {}).get("simpleText", 0),
134
+ publish_time=video_data.get("publishedTimeText", {}).get("simpleText", 0),
135
+ url_suffix=suffix,
136
+ )
137
+ results.append(res)
138
+
139
+ if results:
140
+ break
141
+ return results
142
+
143
+
144
+ if __name__ == "__main__":
145
+ print(get_youtube_video_details("BTS"))
146
+ # print(get_youtube_transcript("y7jrpS8GHxs"))
@@ -21,7 +21,15 @@ class FunctionSignature(NamedTuple):
21
21
  signature: str
22
22
 
23
23
  @classmethod
24
- def from_callable(cls, callable: Callable[..., object]) -> Self:
24
+ def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
25
+ if callables is None:
26
+ return []
27
+ if callable(callables):
28
+ return [cls._from_callable(callables)]
29
+ return [cls._from_callable(callable) for callable in callables]
30
+
31
+ @classmethod
32
+ def _from_callable(cls, callable: Callable[..., object]) -> Self:
25
33
  """
26
34
  Get the name and signature of a function as a string.
27
35
  """
@@ -54,21 +62,17 @@ class FunctionSignature(NamedTuple):
54
62
  else:
55
63
  return cls(name=function_name, callable=callable, signature=signature)
56
64
 
57
- @classmethod
58
- def from_callables(cls, callables: Iterable[Callable[..., object]]) -> list[Self]:
59
- return [cls.from_callable(callable) for callable in callables]
60
-
61
65
  @classmethod
62
66
  def as_prompt(
63
67
  cls,
64
- callables: Iterable[Self],
68
+ function_signatures: Iterable[Self],
65
69
  prefix: Optional[str] = "You can use the pre-made functions below without defining them:\n",
66
70
  sep: str = "\n---\n",
67
71
  ) -> str:
68
72
  """
69
73
  Generate a prompt string from a list of callables.
70
74
  """
71
- body: str = sep.join(fsig.signature for fsig in callables)
75
+ body: str = sep.join(fsig.signature for fsig in function_signatures)
72
76
  if prefix:
73
77
  return f"{prefix}{body}"
74
78
  return body
@@ -92,7 +96,7 @@ class CodeExecutionResult(NamedTuple):
92
96
  """
93
97
  if repl_tool is None:
94
98
  repl_tool = get_default_repl_tool()
95
- if function_signatures is not None:
99
+ if function_signatures:
96
100
  insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
97
101
  output = str(repl_tool.invoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
98
102
  return cls(code=code, output=output)
@@ -111,7 +115,7 @@ class CodeExecutionResult(NamedTuple):
111
115
  """
112
116
  if repl_tool is None:
113
117
  repl_tool = get_default_repl_tool()
114
- if function_signatures is not None:
118
+ if function_signatures:
115
119
  insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
116
120
  output = str(await repl_tool.ainvoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
117
121
  return cls(code=code, output=output)
chatterer/utils/image.py CHANGED
@@ -64,7 +64,7 @@ class Base64Image(BaseModel):
64
64
 
65
65
  IMAGE_TYPES: ClassVar[tuple[str, ...]] = tuple(map(str, get_args(ImageType)))
66
66
  IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
67
- rf"data:image/({'|'.join(IMAGE_TYPES)});base64,[A-Za-z0-9+/]+={0, 2}$"
67
+ r"data:image/(" + "|".join(IMAGE_TYPES) + r");base64,([A-Za-z0-9+/]+={0,2})"
68
68
  )
69
69
 
70
70
  def __hash__(self) -> int:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -18,6 +18,7 @@ Requires-Dist: pillow>=11.1.0; extra == "conversion"
18
18
  Requires-Dist: mistune>=3.1.2; extra == "conversion"
19
19
  Requires-Dist: markitdown>=0.0.2; extra == "conversion"
20
20
  Requires-Dist: pymupdf>=1.25.4; extra == "conversion"
21
+ Requires-Dist: youtube-transcript-api>=1.0.2; extra == "conversion"
21
22
  Provides-Extra: langchain
22
23
  Requires-Dist: chatterer[langchain-providers]; extra == "langchain"
23
24
  Requires-Dist: langchain-experimental>=0.3.4; extra == "langchain"
@@ -1,12 +1,13 @@
1
- chatterer/__init__.py,sha256=QqW6ITeJ7Qpt42BvVzTgI8M6vKRYIlwSDa0nMO4NsV0,1209
2
- chatterer/language_model.py,sha256=J7_iLtfjr-0tNTejrY4_vLiEWGTnRGGUb_x9G2CZ-Vg,20083
1
+ chatterer/__init__.py,sha256=BPgCQ6VWGBXSh8xJr_0bpM0hcOOUz0KoxcKxOd9GYyI,1388
2
+ chatterer/language_model.py,sha256=DX_mU855JHHqE0gdnieWZNOwX1BjIO4VK4EightRL3w,24353
3
3
  chatterer/messages.py,sha256=OtbZ3two0LUQ4PXES97FDIBUSO3IcMHdFV1VFkDL2mI,229
4
4
  chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  chatterer/strategies/__init__.py,sha256=SdOggbmHpw4f7Njwy-T8q64e91OLOUp1k0a0ozZd4qI,221
6
6
  chatterer/strategies/atom_of_thoughts.py,sha256=CygOCLu5vLk-fzY9O-iE3qLShfjD7iY40ks9jH4ULBM,40872
7
7
  chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
8
- chatterer/tools/__init__.py,sha256=XomZMXHKhMyLprQkCfAKetI_uueHH184xWESjTKJeeA,560
8
+ chatterer/tools/__init__.py,sha256=hmWIuLJWotGQodL__i4LLbHdXe7Nl5uKHqNke9tHMro,705
9
9
  chatterer/tools/convert_to_text.py,sha256=kBqxCJ0IoiAw2eiPYqep_SPZm-TtYKF7mdACLsWQUuI,15915
10
+ chatterer/tools/youtube.py,sha256=GhyE05JBF_eos01A_N-X5tZv4wQJ--IjErBbEBeNBpQ,6037
10
11
  chatterer/tools/citation_chunking/__init__.py,sha256=gG7Fnkkp28UpcWMbfMY_4gqzZSZ8QzlhalHBoeoq7K0,82
11
12
  chatterer/tools/citation_chunking/chunks.py,sha256=50Dpa43RaYftlNox8tM1qI8htZ3_AJ9Uyyn02WsmxYk,2173
12
13
  chatterer/tools/citation_chunking/citation_chunker.py,sha256=yx5O9pUkowlNcFyyNf7f3sbq7-CV8AXOzFnviDldPR8,4894
@@ -18,9 +19,9 @@ chatterer/tools/webpage_to_markdown/__init__.py,sha256=bHH4qfnXyw8Zz-yBPLaTezF1s
18
19
  chatterer/tools/webpage_to_markdown/playwright_bot.py,sha256=yP0KixYZNQ4Kn_ZCFDI3mVyBD_DpUGfqgklpaGJUTCU,27496
19
20
  chatterer/tools/webpage_to_markdown/utils.py,sha256=ZLUU94imYciEdynD2K7Dmcsbt8BVQTaOP56Ba6DAFvk,12593
20
21
  chatterer/utils/__init__.py,sha256=8nzpFJKU_wSRPH6LBP6HRBotPMrSl_VO9UlmFprTrK0,334
21
- chatterer/utils/code_agent.py,sha256=phMMXHeZNkzHrySX72y50IW3-o2MOSriPV9IUPQd4nU,4973
22
- chatterer/utils/image.py,sha256=F3_D1677UDFlgp-UQBS_ChkNODzf_VOfjYNSUi02MaI,10852
23
- chatterer-0.1.9.dist-info/METADATA,sha256=GbjuJgbQJ09TEYt7lvI96Od5saTZ_y0KDzmybsW-0H0,4388
24
- chatterer-0.1.9.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
25
- chatterer-0.1.9.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
26
- chatterer-0.1.9.dist-info/RECORD,,
22
+ chatterer/utils/code_agent.py,sha256=UaWdeGzJMPzRSFy9yrxuveBJsvOPSa0te6OuE18bees,5143
23
+ chatterer/utils/image.py,sha256=1imiyq6TB9NIIGx3zAA2OwMWuXlifYIAjwfWRWa4WIM,10858
24
+ chatterer-0.1.11.dist-info/METADATA,sha256=S3hRkxG1DlFc_NGrra1xhniiCDDVoVrow2N96OJy8i0,4458
25
+ chatterer-0.1.11.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
26
+ chatterer-0.1.11.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
27
+ chatterer-0.1.11.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (77.0.3)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5