chatterer 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
chatterer/__init__.py CHANGED
@@ -1,7 +1,8 @@
1
- from .language_model import Chatterer
1
+ from .language_model import Chatterer, interactive_shell
2
2
  from .messages import (
3
3
  AIMessage,
4
4
  BaseMessage,
5
+ FunctionMessage,
5
6
  HumanMessage,
6
7
  SystemMessage,
7
8
  )
@@ -15,9 +16,19 @@ from .tools import (
15
16
  anything_to_markdown,
16
17
  citation_chunker,
17
18
  get_default_html_to_markdown_options,
19
+ get_youtube_video_subtitle,
18
20
  html_to_markdown,
21
+ init_webpage_to_markdown,
19
22
  pdf_to_text,
20
23
  pyscripts_to_snippets,
24
+ get_youtube_video_details,
25
+ )
26
+ from .utils import (
27
+ Base64Image,
28
+ CodeExecutionResult,
29
+ FunctionSignature,
30
+ get_default_repl_tool,
31
+ insert_callables_into_global,
21
32
  )
22
33
 
23
34
  __all__ = [
@@ -36,4 +47,14 @@ __all__ = [
36
47
  "HumanMessage",
37
48
  "SystemMessage",
38
49
  "AIMessage",
50
+ "FunctionMessage",
51
+ "Base64Image",
52
+ "init_webpage_to_markdown",
53
+ "FunctionSignature",
54
+ "CodeExecutionResult",
55
+ "get_default_repl_tool",
56
+ "insert_callables_into_global",
57
+ "get_youtube_video_subtitle",
58
+ "get_youtube_video_details",
59
+ "interactive_shell",
39
60
  ]
@@ -2,9 +2,12 @@ from typing import (
2
2
  TYPE_CHECKING,
3
3
  Any,
4
4
  AsyncIterator,
5
+ Callable,
6
+ Iterable,
5
7
  Iterator,
6
8
  Optional,
7
9
  Self,
10
+ Sequence,
8
11
  Type,
9
12
  TypeAlias,
10
13
  TypeVar,
@@ -18,15 +21,39 @@ from langchain_core.runnables.base import Runnable
18
21
  from langchain_core.runnables.config import RunnableConfig
19
22
  from pydantic import BaseModel, Field
20
23
 
21
- from .messages import AIMessage, BaseMessage, HumanMessage
24
+ from .messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
25
+ from .utils.code_agent import CodeExecutionResult, FunctionSignature, get_default_repl_tool
22
26
 
23
27
  if TYPE_CHECKING:
24
28
  from instructor import Partial
29
+ from langchain_experimental.tools.python.tool import PythonAstREPLTool
25
30
 
26
31
  PydanticModelT = TypeVar("PydanticModelT", bound=BaseModel)
27
32
  StructuredOutputType: TypeAlias = dict[object, object] | BaseModel
28
33
 
29
- DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION = "Just describe all the details you see in the image in few sentences."
34
+ DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION = "Provide a detailed description of all visible elements in the image, summarizing key details in a few clear sentences."
35
+ DEFAULT_CODE_GENERATION_PROMPT = (
36
+ "You are utilizing a Python code execution tool now.\n"
37
+ "Your goal is to generate Python code that solves the task efficiently and appends both the code and its output to your context memory.\n"
38
+ "Since your context window is highly limited, type `pass` if no code execution is needed.\n"
39
+ "\n"
40
+ "To optimize tool efficiency, follow these guidelines:\n"
41
+ "- Write concise, efficient code that directly serves the intended purpose.\n"
42
+ "- Avoid unnecessary operations (e.g., excessive loops, recursion, or heavy computations).\n"
43
+ "- Handle potential errors gracefully (e.g., using try-except blocks).\n"
44
+ "- Prevent excessive output by limiting print statements to essential information only (e.g., avoid printing large datasets).\n"
45
+ "\n"
46
+ "Return your response strictly in the following JSON format:\n"
47
+ '{\n "code": "<your_python_code_here>"\n}\n\n'
48
+ )
49
+
50
+
51
+ DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT = (
52
+ "Below functions are included in global scope and can be used in your code.\n"
53
+ "Do not try to redefine the function(s).\n"
54
+ "You don't have to force yourself to use these tools - use them only when you need to.\n"
55
+ )
56
+ DEFAULT_FUNCTION_REFERENCE_SEPARATOR = "\n---\n" # Separator to distinguish different function references
30
57
 
31
58
 
32
59
  class Chatterer(BaseModel):
@@ -162,7 +189,7 @@ class Chatterer(BaseModel):
162
189
  stop: Optional[list[str]] = None,
163
190
  **kwargs: Any,
164
191
  ) -> PydanticModelT:
165
- result: StructuredOutputType = with_structured_output(
192
+ result: StructuredOutputType = _with_structured_output(
166
193
  client=self.client,
167
194
  response_model=response_model,
168
195
  structured_output_kwargs=self.structured_output_kwargs,
@@ -180,7 +207,7 @@ class Chatterer(BaseModel):
180
207
  stop: Optional[list[str]] = None,
181
208
  **kwargs: Any,
182
209
  ) -> PydanticModelT:
183
- result: StructuredOutputType = await with_structured_output(
210
+ result: StructuredOutputType = await _with_structured_output(
184
211
  client=self.client,
185
212
  response_model=response_model,
186
213
  structured_output_kwargs=self.structured_output_kwargs,
@@ -204,7 +231,7 @@ class Chatterer(BaseModel):
204
231
  raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
205
232
 
206
233
  partial_response_model = instructor.Partial[response_model]
207
- for chunk in with_structured_output(
234
+ for chunk in _with_structured_output(
208
235
  client=self.client,
209
236
  response_model=partial_response_model,
210
237
  structured_output_kwargs=self.structured_output_kwargs,
@@ -225,7 +252,7 @@ class Chatterer(BaseModel):
225
252
  raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
226
253
 
227
254
  partial_response_model = instructor.Partial[response_model]
228
- async for chunk in with_structured_output(
255
+ async for chunk in _with_structured_output(
229
256
  client=self.client,
230
257
  response_model=partial_response_model,
231
258
  structured_output_kwargs=self.structured_output_kwargs,
@@ -288,8 +315,77 @@ class Chatterer(BaseModel):
288
315
  except Exception:
289
316
  return None
290
317
 
318
+ def invoke_code_execution(
319
+ self,
320
+ messages: LanguageModelInput,
321
+ repl_tool: Optional["PythonAstREPLTool"] = None,
322
+ prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
323
+ function_signatures: Optional[FunctionSignature | Iterable[FunctionSignature]] = None,
324
+ function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
325
+ function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
326
+ config: Optional[RunnableConfig] = None,
327
+ stop: Optional[list[str]] = None,
328
+ **kwargs: Any,
329
+ ) -> CodeExecutionResult:
330
+ if not function_signatures:
331
+ function_signatures = []
332
+ elif isinstance(function_signatures, FunctionSignature):
333
+ function_signatures = [function_signatures]
334
+ messages = augment_prompt_for_toolcall(
335
+ function_signatures=function_signatures,
336
+ messages=messages,
337
+ prompt_for_code_invoke=prompt_for_code_invoke,
338
+ function_reference_prefix=function_reference_prefix,
339
+ function_reference_seperator=function_reference_seperator,
340
+ )
341
+ code_obj: PythonCodeToExecute = self.generate_pydantic(
342
+ response_model=PythonCodeToExecute, messages=messages, config=config, stop=stop, **kwargs
343
+ )
344
+ return CodeExecutionResult.from_code(
345
+ code=code_obj.code,
346
+ config=config,
347
+ repl_tool=repl_tool,
348
+ function_signatures=function_signatures,
349
+ **kwargs,
350
+ )
351
+
352
+ async def ainvoke_code_execution(
353
+ self,
354
+ messages: LanguageModelInput,
355
+ repl_tool: Optional["PythonAstREPLTool"] = None,
356
+ prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
357
+ additional_callables: Optional[Callable[..., object] | Sequence[Callable[..., object]]] = None,
358
+ function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
359
+ function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
360
+ config: Optional[RunnableConfig] = None,
361
+ stop: Optional[list[str]] = None,
362
+ **kwargs: Any,
363
+ ) -> CodeExecutionResult:
364
+ function_signatures: list[FunctionSignature] = FunctionSignature.from_callable(additional_callables)
365
+ messages = augment_prompt_for_toolcall(
366
+ function_signatures=function_signatures,
367
+ messages=messages,
368
+ prompt_for_code_invoke=prompt_for_code_invoke,
369
+ function_reference_prefix=function_reference_prefix,
370
+ function_reference_seperator=function_reference_seperator,
371
+ )
372
+ code_obj: PythonCodeToExecute = await self.agenerate_pydantic(
373
+ response_model=PythonCodeToExecute, messages=messages, config=config, stop=stop, **kwargs
374
+ )
375
+ return await CodeExecutionResult.afrom_code(
376
+ code=code_obj.code,
377
+ config=config,
378
+ repl_tool=repl_tool,
379
+ function_signatures=function_signatures,
380
+ **kwargs,
381
+ )
382
+
383
+
384
+ class PythonCodeToExecute(BaseModel):
385
+ code: str = Field(description="Python code to execute")
291
386
 
292
- def with_structured_output(
387
+
388
+ def _with_structured_output(
293
389
  client: BaseChatModel,
294
390
  response_model: Type["PydanticModelT | Partial[PydanticModelT]"],
295
391
  structured_output_kwargs: dict[str, Any],
@@ -297,75 +393,188 @@ def with_structured_output(
297
393
  return client.with_structured_output(schema=response_model, **structured_output_kwargs) # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
298
394
 
299
395
 
300
- if __name__ == "__main__":
301
- import asyncio
302
-
303
- # 테스트용 Pydantic 모델 정의
304
- class Propositions(BaseModel):
305
- proposition_topic: str
306
- proposition_content: str
307
-
308
- chatterer = Chatterer.openai()
309
- prompt = "What is the meaning of life?"
310
-
311
- # === Synchronous Tests ===
312
-
313
- # generate
314
- print("=== Synchronous generate ===")
315
- result_sync = chatterer(prompt)
316
- print("Result (generate):", result_sync)
317
-
318
- # generate_stream
319
- print("\n=== Synchronous generate_stream ===")
320
- for i, chunk in enumerate(chatterer.generate_stream(prompt)):
321
- print(f"Chunk {i}:", chunk)
322
-
323
- # generate_pydantic
324
- print("\n=== Synchronous generate_pydantic ===")
325
- result_pydantic = chatterer(prompt, Propositions)
326
- print("Result (generate_pydantic):", result_pydantic)
327
-
328
- # generate_pydantic_stream
329
- print("\n=== Synchronous generate_pydantic_stream ===")
330
- for i, chunk in enumerate(chatterer.generate_pydantic_stream(Propositions, prompt)):
331
- print(f"Pydantic Chunk {i}:", chunk)
332
-
333
- # === Asynchronous Tests ===
334
-
335
- # Async helper function to enumerate async iterator
336
- async def async_enumerate(aiter: AsyncIterator[Any], start: int = 0) -> AsyncIterator[tuple[int, Any]]:
337
- i = start
338
- async for item in aiter:
339
- yield i, item
340
- i += 1
341
-
342
- async def run_async_tests():
343
- # 6. agenerate
344
- print("\n=== Asynchronous agenerate ===")
345
- result_async = await chatterer.agenerate(prompt)
346
- print("Result (agenerate):", result_async)
347
-
348
- # 7. agenerate_stream
349
- print("\n=== Asynchronous agenerate_stream ===")
350
- async for i, chunk in async_enumerate(chatterer.agenerate_stream(prompt)):
351
- print(f"Async Chunk {i}:", chunk)
352
-
353
- # 8. agenerate_pydantic
354
- print("\n=== Asynchronous agenerate_pydantic ===")
355
- try:
356
- result_async_pydantic = await chatterer.agenerate_pydantic(Propositions, prompt)
357
- print("Result (agenerate_pydantic):", result_async_pydantic)
358
- except Exception as e:
359
- print("Error in agenerate_pydantic:", e)
396
+ def _add_message_last(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
397
+ if isinstance(messages, str):
398
+ messages += f"\n{prompt_to_add}"
399
+ elif isinstance(messages, Sequence):
400
+ messages = list(messages)
401
+ messages.append(SystemMessage(content=prompt_to_add))
402
+ else:
403
+ messages = messages.to_messages()
404
+ messages.append(SystemMessage(content=prompt_to_add))
405
+ return messages
406
+
407
+
408
+ # def _add_message_first(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
409
+ # if isinstance(messages, str):
410
+ # messages = f"{prompt_to_add}\n{messages}"
411
+ # elif isinstance(messages, Sequence):
412
+ # messages = list(messages)
413
+ # messages.insert(0, SystemMessage(content=prompt_to_add))
414
+ # else:
415
+ # messages = messages.to_messages()
416
+ # messages.insert(0, SystemMessage(content=prompt_to_add))
417
+ # return messages
418
+
419
+
420
+ def augment_prompt_for_toolcall(
421
+ function_signatures: Iterable[FunctionSignature],
422
+ messages: LanguageModelInput,
423
+ prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
424
+ function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
425
+ function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
426
+ ) -> LanguageModelInput:
427
+ if function_signatures:
428
+ messages = _add_message_last(
429
+ messages=messages,
430
+ prompt_to_add=FunctionSignature.as_prompt(
431
+ function_signatures, function_reference_prefix, function_reference_seperator
432
+ ),
433
+ )
434
+ if prompt_for_code_invoke:
435
+ messages = _add_message_last(messages=messages, prompt_to_add=prompt_for_code_invoke)
436
+ return messages
437
+
438
+
439
+ def interactive_shell(
440
+ chatterer: Chatterer = Chatterer.openai(),
441
+ system_instruction: BaseMessage | Iterable[BaseMessage] = ([
442
+ SystemMessage("You are an AI that can answer questions and execute Python code."),
443
+ ]),
444
+ repl_tool: Optional["PythonAstREPLTool"] = None,
445
+ prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
446
+ additional_callables: Optional[Callable[..., object] | Sequence[Callable[..., object]]] = None,
447
+ function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
448
+ function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
449
+ config: Optional[RunnableConfig] = None,
450
+ stop: Optional[list[str]] = None,
451
+ **kwargs: Any,
452
+ ) -> None:
453
+ # Define the CodeExecutionDecision class using Pydantic
454
+
455
+ from rich.console import Console
456
+ from rich.prompt import Prompt
457
+
458
+ class IsCodeExecutionNeeded(BaseModel):
459
+ is_code_execution_needed: bool = Field(
460
+ description="Whether Python tool calling is needed to answer user query."
461
+ )
360
462
 
361
- # 9. agenerate_pydantic_stream
362
- print("\n=== Asynchronous agenerate_pydantic_stream ===")
363
- try:
364
- i = 0
365
- async for chunk in chatterer.agenerate_pydantic_stream(Propositions, prompt):
366
- print(f"Async Pydantic Chunk {i}:", chunk)
367
- i += 1
368
- except Exception as e:
369
- print("Error in agenerate_pydantic_stream:", e)
370
-
371
- asyncio.run(run_async_tests())
463
+ class IsFurtherCodeExecutionNeeded(BaseModel):
464
+ review_on_code_execution: str = Field(description="Review on the code execution.")
465
+ next_action: str = Field(description="Next action to take.")
466
+ is_further_code_execution_needed: bool = Field(
467
+ description="Whether further Python tool calling is needed to answer user query."
468
+ )
469
+
470
+ # Get default REPL tool if not provided.
471
+ # This tool namespace is persistent across multiple code executions.
472
+ if repl_tool is None:
473
+ repl_tool = get_default_repl_tool()
474
+
475
+ function_signatures: list[FunctionSignature] = FunctionSignature.from_callable(additional_callables)
476
+
477
+ # Initialize Rich console
478
+ console = Console()
479
+
480
+ # Initialize conversation context
481
+ context: list[BaseMessage] = []
482
+ if system_instruction:
483
+ if isinstance(system_instruction, BaseMessage):
484
+ context.append(system_instruction)
485
+ else:
486
+ context.extend(system_instruction)
487
+
488
+ # Display welcome message
489
+ console.print("[bold blue]Welcome to the Interactive Chatterer Shell![/bold blue]")
490
+ console.print("Type 'quit' or 'exit' to end the conversation.")
491
+
492
+ while True:
493
+ # Get user input
494
+ user_input = Prompt.ask("[bold green]You[/bold green]")
495
+ if user_input.lower() in ["quit", "exit"]:
496
+ console.print("[bold blue]Goodbye![/bold blue]")
497
+ break
498
+
499
+ # Add user message to context
500
+ context.append(HumanMessage(content=user_input))
501
+
502
+ # Determine if code execution is needed
503
+ decision = chatterer.generate_pydantic(
504
+ response_model=IsCodeExecutionNeeded, # Use response_model instead of pydantic_model
505
+ messages=augment_prompt_for_toolcall(
506
+ function_signatures=function_signatures,
507
+ messages=context,
508
+ prompt_for_code_invoke=prompt_for_code_invoke,
509
+ function_reference_prefix=function_reference_prefix,
510
+ function_reference_seperator=function_reference_seperator,
511
+ ),
512
+ )
513
+
514
+ if decision.is_code_execution_needed:
515
+ # Execute code if needed
516
+ code_result = chatterer.invoke_code_execution(
517
+ messages=context,
518
+ repl_tool=repl_tool,
519
+ prompt_for_code_invoke=prompt_for_code_invoke,
520
+ function_signatures=function_signatures,
521
+ function_reference_prefix=function_reference_prefix,
522
+ function_reference_seperator=function_reference_seperator,
523
+ config=config,
524
+ stop=stop,
525
+ **kwargs,
526
+ )
527
+
528
+ if code_result.code.strip() == "pass":
529
+ tool_use_message = None
530
+ else:
531
+ code_session_messages: list[BaseMessage] = []
532
+ while True:
533
+ code_execution_message = SystemMessage(
534
+ content=f"Executed code:\n```python\n{code_result.code}\n```\nOutput:\n{code_result.output}"
535
+ )
536
+ code_session_messages.append(code_execution_message)
537
+ console.print("[bold yellow]Executed code:[/bold yellow]")
538
+ console.print(f"[code]{code_result.code}[/code]")
539
+ console.print("[bold yellow]Output:[/bold yellow]")
540
+ console.print(code_result.output)
541
+
542
+ decision = chatterer.generate_pydantic(
543
+ response_model=IsFurtherCodeExecutionNeeded, # Use response_model instead of pydantic_model
544
+ messages=augment_prompt_for_toolcall(
545
+ function_signatures=function_signatures,
546
+ messages=context + code_session_messages,
547
+ prompt_for_code_invoke=prompt_for_code_invoke,
548
+ function_reference_prefix=function_reference_prefix,
549
+ function_reference_seperator=function_reference_seperator,
550
+ ),
551
+ )
552
+ review_on_code_execution = decision.review_on_code_execution
553
+ next_action = decision.next_action
554
+ console.print("[bold blue]AI:[/bold blue]")
555
+ console.print(f"-[bold yellow]Review on code execution:[/bold yellow] {review_on_code_execution}")
556
+ console.print(f"-[bold yellow]Next Action:[/bold yellow] {next_action}")
557
+ code_session_messages.append(
558
+ AIMessage(
559
+ content=f"- Review upon code execution: {decision.review_on_code_execution}\n- Next Action: {decision.next_action}"
560
+ )
561
+ )
562
+ if not decision.is_further_code_execution_needed:
563
+ tool_use_message = code_execution_message
564
+ break
565
+ else:
566
+ # No code execution required
567
+ tool_use_message = None
568
+
569
+ # Add system message to context
570
+ if tool_use_message:
571
+ context.append(tool_use_message)
572
+
573
+ # Generate and display chatbot response
574
+ response = chatterer.generate(messages=context) # Use generate instead of generate_response
575
+ context.append(AIMessage(content=response))
576
+ console.print(f"[bold blue]AI:[/bold blue] {response}")
577
+
578
+
579
+ if __name__ == "__main__":
580
+ interactive_shell()
chatterer/messages.py CHANGED
@@ -1,8 +1,9 @@
1
- from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
1
+ from langchain_core.messages import AIMessage, BaseMessage, FunctionMessage, HumanMessage, SystemMessage
2
2
 
3
3
  __all__ = [
4
4
  "AIMessage",
5
5
  "BaseMessage",
6
6
  "HumanMessage",
7
7
  "SystemMessage",
8
+ "FunctionMessage",
8
9
  ]
@@ -6,6 +6,14 @@ from .convert_to_text import (
6
6
  pdf_to_text,
7
7
  pyscripts_to_snippets,
8
8
  )
9
+ from .youtube import get_youtube_video_subtitle, get_youtube_video_details
10
+
11
+
12
+ def init_webpage_to_markdown():
13
+ from . import webpage_to_markdown
14
+
15
+ return webpage_to_markdown
16
+
9
17
 
10
18
  __all__ = [
11
19
  "html_to_markdown",
@@ -14,4 +22,7 @@ __all__ = [
14
22
  "get_default_html_to_markdown_options",
15
23
  "pyscripts_to_snippets",
16
24
  "citation_chunker",
25
+ "init_webpage_to_markdown",
26
+ "get_youtube_video_subtitle",
27
+ "get_youtube_video_details",
17
28
  ]
@@ -0,0 +1,132 @@
1
+ import json
2
+ import unicodedata
3
+ import urllib.parse
4
+ from dataclasses import dataclass
5
+ from typing import Any, Optional, Self, cast
6
+
7
+ import requests
8
+
9
+
10
+ def get_youtube_video_details(
11
+ query: str,
12
+ ) -> list[dict[str, Optional[str]]]:
13
+ """Search for video metadata on YouTube using the given query. Returns a list of dictionaries containing `video_id`, `title`, `channel`, `duration`, `views`, `publish_time`, and `long_desc`."""
14
+ return [
15
+ {
16
+ "video_id": video_id,
17
+ "title": video.title,
18
+ "channel": video.channel,
19
+ "duration": video.duration,
20
+ "views": video.views,
21
+ "publish_time": video.publish_time,
22
+ "long_desc": video.long_desc,
23
+ }
24
+ for video in YoutubeSearchResult.from_query(base_url="https://youtube.com", query=query, max_results=10)
25
+ if (video_id := _get_video_id(video.url_suffix))
26
+ ]
27
+
28
+
29
+ def get_youtube_video_subtitle(video_id: str) -> str:
30
+ """Get the transcript of a YouTube video using the given video ID."""
31
+
32
+ from youtube_transcript_api._api import YouTubeTranscriptApi
33
+
34
+ get_transcript = YouTubeTranscriptApi.get_transcript # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
35
+ list_transcripts = YouTubeTranscriptApi.list_transcripts # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
36
+
37
+ result: str = ""
38
+ buffer_timestamp: str = "0s"
39
+ buffer_texts: list[str] = []
40
+ for entry in get_transcript(video_id, languages=(next(iter(list_transcripts(video_id))).language_code,)): # pyright: ignore[reportUnknownVariableType]
41
+ entry = cast(dict[object, object], entry)
42
+ text: str = str(entry.get("text", "")).strip().replace("\n", " ")
43
+ if not text:
44
+ continue
45
+ if len(buffer_texts) >= 10 or _is_special_char(text) or (buffer_texts and _is_special_char(buffer_texts[-1])):
46
+ result += f"[{buffer_timestamp}] {'. '.join(buffer_texts)}\n"
47
+ start = entry.get("start", 0)
48
+ if start:
49
+ buffer_timestamp = f"{start:.0f}s"
50
+ buffer_texts = [text]
51
+ else:
52
+ buffer_texts.append(text)
53
+
54
+ if buffer_texts:
55
+ result += f"[{buffer_timestamp}] {' '.join(buffer_texts)}"
56
+ return result
57
+
58
+
59
+ def _get_video_id(suffix: str) -> str:
60
+ return next(iter(urllib.parse.parse_qs(urllib.parse.urlparse(suffix).query)["v"]), "")
61
+
62
+
63
+ def _is_special_char(text: str) -> bool:
64
+ if not text:
65
+ return False
66
+ return not unicodedata.category(text[0]).startswith("L")
67
+
68
+
69
+ @dataclass
70
+ class YoutubeSearchResult:
71
+ url_suffix: str
72
+ id: Optional[str]
73
+ thumbnails: list[str]
74
+ title: Optional[str]
75
+ long_desc: Optional[str]
76
+ channel: Optional[str]
77
+ duration: Optional[str]
78
+ views: Optional[str]
79
+ publish_time: Optional[str]
80
+
81
+ @classmethod
82
+ def from_query(cls, base_url: str, query: str, max_results: int) -> list[Self]:
83
+ url: str = f"{base_url}/results?search_query={urllib.parse.quote_plus(query)}"
84
+ response: str = requests.get(url).text
85
+ while "ytInitialData" not in response:
86
+ response = requests.get(url).text
87
+ results: list[Self] = cls.parse_html(response)
88
+ return results[:max_results]
89
+
90
+ @classmethod
91
+ def parse_html(cls, html: str) -> list[Self]:
92
+ results: list[Self] = []
93
+ start: int = html.index("ytInitialData") + len("ytInitialData") + 3
94
+ end: int = html.index("};", start) + 1
95
+ data: Any = json.loads(html[start:end])
96
+ for contents in data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"][
97
+ "contents"
98
+ ]:
99
+ for video in contents["itemSectionRenderer"]["contents"]:
100
+ if "videoRenderer" in video.keys():
101
+ video_data = video.get("videoRenderer", {})
102
+ suffix = (
103
+ video_data.get("navigationEndpoint", {})
104
+ .get("commandMetadata", {})
105
+ .get("webCommandMetadata", {})
106
+ .get("url", None)
107
+ )
108
+ if not suffix:
109
+ continue
110
+ res = cls(
111
+ id=video_data.get("videoId", None),
112
+ thumbnails=[
113
+ thumb.get("url", None) for thumb in video_data.get("thumbnail", {}).get("thumbnails", [{}])
114
+ ],
115
+ title=video_data.get("title", {}).get("runs", [[{}]])[0].get("text", None),
116
+ long_desc=video_data.get("descriptionSnippet", {}).get("runs", [{}])[0].get("text", None),
117
+ channel=video_data.get("longBylineText", {}).get("runs", [[{}]])[0].get("text", None),
118
+ duration=video_data.get("lengthText", {}).get("simpleText", 0),
119
+ views=video_data.get("viewCountText", {}).get("simpleText", 0),
120
+ publish_time=video_data.get("publishedTimeText", {}).get("simpleText", 0),
121
+ url_suffix=suffix,
122
+ )
123
+ results.append(res)
124
+
125
+ if results:
126
+ break
127
+ return results
128
+
129
+
130
+ if __name__ == "__main__":
131
+ print(get_youtube_video_details("BTS"))
132
+ # print(get_youtube_transcript("y7jrpS8GHxs"))
@@ -0,0 +1,15 @@
1
+ from .code_agent import (
2
+ CodeExecutionResult,
3
+ FunctionSignature,
4
+ get_default_repl_tool,
5
+ insert_callables_into_global,
6
+ )
7
+ from .image import Base64Image
8
+
9
+ __all__ = [
10
+ "Base64Image",
11
+ "FunctionSignature",
12
+ "CodeExecutionResult",
13
+ "get_default_repl_tool",
14
+ "insert_callables_into_global",
15
+ ]
@@ -0,0 +1,138 @@
1
+ import inspect
2
+ import textwrap
3
+ from typing import (
4
+ TYPE_CHECKING,
5
+ Callable,
6
+ Iterable,
7
+ NamedTuple,
8
+ Optional,
9
+ Self,
10
+ )
11
+
12
+ from langchain_core.runnables.config import RunnableConfig
13
+
14
+ if TYPE_CHECKING:
15
+ from langchain_experimental.tools import PythonAstREPLTool
16
+
17
+
18
+ class FunctionSignature(NamedTuple):
19
+ name: str
20
+ callable: Callable[..., object]
21
+ signature: str
22
+
23
+ @classmethod
24
+ def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
25
+ if callables is None:
26
+ return []
27
+ if callable(callables):
28
+ return [cls._from_callable(callables)]
29
+ return [cls._from_callable(callable) for callable in callables]
30
+
31
+ @classmethod
32
+ def _from_callable(cls, callable: Callable[..., object]) -> Self:
33
+ """
34
+ Get the name and signature of a function as a string.
35
+ """
36
+ # Determine if the function is async
37
+ is_async_func = inspect.iscoroutinefunction(callable)
38
+ function_def = "async def" if is_async_func else "def"
39
+
40
+ # Determine the function name based on the type of callable
41
+ if inspect.isfunction(callable):
42
+ # For regular Python functions, use __code__.co_name
43
+ function_name = callable.__code__.co_name
44
+ elif hasattr(callable, "name"):
45
+ # For StructuredTool or similar objects with a 'name' attribute
46
+ function_name = callable.name # type: ignore
47
+ elif hasattr(callable, "__name__"):
48
+ # For other callables with a __name__ attribute
49
+ function_name = callable.__name__
50
+ else:
51
+ # Fallback to the class name if no name is found
52
+ function_name = type(callable).__name__
53
+
54
+ # Build the signature string
55
+ signature = f"{function_def} {function_name}{inspect.signature(callable)}:"
56
+ docstring = inspect.getdoc(callable)
57
+ if docstring:
58
+ docstring = f'"""{docstring.strip()}"""'
59
+ return cls(
60
+ name=function_name, callable=callable, signature=f"{signature}\n{textwrap.indent(docstring, ' ')}"
61
+ )
62
+ else:
63
+ return cls(name=function_name, callable=callable, signature=signature)
64
+
65
+ @classmethod
66
+ def as_prompt(
67
+ cls,
68
+ function_signatures: Iterable[Self],
69
+ prefix: Optional[str] = "You can use the pre-made functions below without defining them:\n",
70
+ sep: str = "\n---\n",
71
+ ) -> str:
72
+ """
73
+ Generate a prompt string from a list of callables.
74
+ """
75
+ body: str = sep.join(fsig.signature for fsig in function_signatures)
76
+ if prefix:
77
+ return f"{prefix}{body}"
78
+ return body
79
+
80
+
81
+ class CodeExecutionResult(NamedTuple):
82
+ code: str
83
+ output: str
84
+
85
+ @classmethod
86
+ def from_code(
87
+ cls,
88
+ code: str,
89
+ repl_tool: Optional["PythonAstREPLTool"] = None,
90
+ config: Optional[RunnableConfig] = None,
91
+ function_signatures: Optional[Iterable[FunctionSignature]] = None,
92
+ **kwargs: object,
93
+ ) -> Self:
94
+ """
95
+ Execute code using the Python Code Execution Language Model.
96
+ """
97
+ if repl_tool is None:
98
+ repl_tool = get_default_repl_tool()
99
+ if function_signatures:
100
+ insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
101
+ output = str(repl_tool.invoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
102
+ return cls(code=code, output=output)
103
+
104
+ @classmethod
105
+ async def afrom_code(
106
+ cls,
107
+ code: str,
108
+ repl_tool: Optional["PythonAstREPLTool"] = None,
109
+ config: Optional[RunnableConfig] = None,
110
+ function_signatures: Optional[Iterable[FunctionSignature]] = None,
111
+ **kwargs: object,
112
+ ) -> Self:
113
+ """
114
+ Execute code using the Python Code Execution Language Model asynchronously.
115
+ """
116
+ if repl_tool is None:
117
+ repl_tool = get_default_repl_tool()
118
+ if function_signatures:
119
+ insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
120
+ output = str(await repl_tool.ainvoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
121
+ return cls(code=code, output=output)
122
+
123
+
124
+ def get_default_repl_tool() -> "PythonAstREPLTool":
125
+ from langchain_experimental.tools import PythonAstREPLTool
126
+
127
+ return PythonAstREPLTool()
128
+
129
+
130
+ def insert_callables_into_global(
131
+ function_signatures: Iterable[FunctionSignature], repl_tool: "PythonAstREPLTool"
132
+ ) -> None:
133
+ """Insert callables into the REPL tool's globals."""
134
+ repl_globals: Optional[dict[str, object]] = repl_tool.globals # pyright: ignore[reportUnknownMemberType]
135
+ if repl_globals is None:
136
+ repl_tool.globals = {fsig.name: fsig.callable for fsig in function_signatures}
137
+ else:
138
+ repl_globals.update({fsig.name: fsig.callable for fsig in function_signatures})
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: chatterer
3
- Version: 0.1.8
3
+ Version: 0.1.10
4
4
  Summary: The highest-level interface for various LLM APIs.
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -18,13 +18,17 @@ Requires-Dist: pillow>=11.1.0; extra == "conversion"
18
18
  Requires-Dist: mistune>=3.1.2; extra == "conversion"
19
19
  Requires-Dist: markitdown>=0.0.2; extra == "conversion"
20
20
  Requires-Dist: pymupdf>=1.25.4; extra == "conversion"
21
+ Requires-Dist: youtube-transcript-api>=1.0.2; extra == "conversion"
22
+ Provides-Extra: langchain
23
+ Requires-Dist: chatterer[langchain-providers]; extra == "langchain"
24
+ Requires-Dist: langchain-experimental>=0.3.4; extra == "langchain"
21
25
  Provides-Extra: langchain-providers
22
26
  Requires-Dist: langchain-openai>=0.3.7; extra == "langchain-providers"
23
27
  Requires-Dist: langchain-anthropic>=0.3.8; extra == "langchain-providers"
24
28
  Requires-Dist: langchain-google-genai>=2.0.10; extra == "langchain-providers"
25
29
  Requires-Dist: langchain-ollama>=0.2.3; extra == "langchain-providers"
26
30
  Provides-Extra: all
27
- Requires-Dist: chatterer[langchain-providers]; extra == "all"
31
+ Requires-Dist: chatterer[langchain]; extra == "all"
28
32
  Requires-Dist: chatterer[conversion]; extra == "all"
29
33
  Requires-Dist: chatterer[dev]; extra == "all"
30
34
 
@@ -1,12 +1,13 @@
1
- chatterer/__init__.py,sha256=kl8VWiDJIt5IQjaBpQu13n0GrzP3qzaNXyA68B1xHTE,802
2
- chatterer/language_model.py,sha256=S8x2IbzZBi1mAKSKrGuoB4-gfKBz73RCNXt_H-fiDzc,13826
3
- chatterer/messages.py,sha256=-NyOIK7wJI1uVD8qaJPeLA0LqirFEsZ1mOYoO1F2wLc,188
1
+ chatterer/__init__.py,sha256=BPgCQ6VWGBXSh8xJr_0bpM0hcOOUz0KoxcKxOd9GYyI,1388
2
+ chatterer/language_model.py,sha256=qnVC5_W4IYM0y0o1PTYMGXUlblRv5fsRk0zIiL_vT3Q,24491
3
+ chatterer/messages.py,sha256=OtbZ3two0LUQ4PXES97FDIBUSO3IcMHdFV1VFkDL2mI,229
4
4
  chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  chatterer/strategies/__init__.py,sha256=SdOggbmHpw4f7Njwy-T8q64e91OLOUp1k0a0ozZd4qI,221
6
6
  chatterer/strategies/atom_of_thoughts.py,sha256=CygOCLu5vLk-fzY9O-iE3qLShfjD7iY40ks9jH4ULBM,40872
7
7
  chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
8
- chatterer/tools/__init__.py,sha256=yA4RcHIAO33xsmWXQTmtSm9bk1p80yJKSadtMa3X-aY,415
8
+ chatterer/tools/__init__.py,sha256=hmWIuLJWotGQodL__i4LLbHdXe7Nl5uKHqNke9tHMro,705
9
9
  chatterer/tools/convert_to_text.py,sha256=kBqxCJ0IoiAw2eiPYqep_SPZm-TtYKF7mdACLsWQUuI,15915
10
+ chatterer/tools/youtube.py,sha256=5hwASZWA92d_7Y5RqlCK2tULRBQx8bGnE_0NCnvaKi0,5499
10
11
  chatterer/tools/citation_chunking/__init__.py,sha256=gG7Fnkkp28UpcWMbfMY_4gqzZSZ8QzlhalHBoeoq7K0,82
11
12
  chatterer/tools/citation_chunking/chunks.py,sha256=50Dpa43RaYftlNox8tM1qI8htZ3_AJ9Uyyn02WsmxYk,2173
12
13
  chatterer/tools/citation_chunking/citation_chunker.py,sha256=yx5O9pUkowlNcFyyNf7f3sbq7-CV8AXOzFnviDldPR8,4894
@@ -17,8 +18,10 @@ chatterer/tools/citation_chunking/utils.py,sha256=M4pH2-UIE1VLzQLXDqjEe4L3Xcy0e0
17
18
  chatterer/tools/webpage_to_markdown/__init__.py,sha256=bHH4qfnXyw8Zz-yBPLaTezF1sh9njvNBJmhBVtcpjsA,123
18
19
  chatterer/tools/webpage_to_markdown/playwright_bot.py,sha256=yP0KixYZNQ4Kn_ZCFDI3mVyBD_DpUGfqgklpaGJUTCU,27496
19
20
  chatterer/tools/webpage_to_markdown/utils.py,sha256=ZLUU94imYciEdynD2K7Dmcsbt8BVQTaOP56Ba6DAFvk,12593
21
+ chatterer/utils/__init__.py,sha256=8nzpFJKU_wSRPH6LBP6HRBotPMrSl_VO9UlmFprTrK0,334
22
+ chatterer/utils/code_agent.py,sha256=UaWdeGzJMPzRSFy9yrxuveBJsvOPSa0te6OuE18bees,5143
20
23
  chatterer/utils/image.py,sha256=F3_D1677UDFlgp-UQBS_ChkNODzf_VOfjYNSUi02MaI,10852
21
- chatterer-0.1.8.dist-info/METADATA,sha256=01CGNp0oae5VdHM5gzqPKYFtlSqufE0h5XFMdn2E_6c,4234
22
- chatterer-0.1.8.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
23
- chatterer-0.1.8.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
24
- chatterer-0.1.8.dist-info/RECORD,,
24
+ chatterer-0.1.10.dist-info/METADATA,sha256=qPx7b41yUvBG0XFH4ra89LIyOSUZUo_8gZ-adVkTKME,4458
25
+ chatterer-0.1.10.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
26
+ chatterer-0.1.10.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
27
+ chatterer-0.1.10.dist-info/RECORD,,