chatterer 0.1.9__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterer/__init__.py +6 -1
- chatterer/language_model.py +174 -85
- chatterer/tools/__init__.py +3 -0
- chatterer/tools/youtube.py +146 -0
- chatterer/utils/code_agent.py +13 -9
- chatterer/utils/image.py +1 -1
- {chatterer-0.1.9.dist-info → chatterer-0.1.11.dist-info}/METADATA +2 -1
- {chatterer-0.1.9.dist-info → chatterer-0.1.11.dist-info}/RECORD +10 -9
- {chatterer-0.1.9.dist-info → chatterer-0.1.11.dist-info}/WHEEL +1 -1
- {chatterer-0.1.9.dist-info → chatterer-0.1.11.dist-info}/top_level.txt +0 -0
chatterer/__init__.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from .language_model import Chatterer
|
1
|
+
from .language_model import Chatterer, interactive_shell
|
2
2
|
from .messages import (
|
3
3
|
AIMessage,
|
4
4
|
BaseMessage,
|
@@ -16,10 +16,12 @@ from .tools import (
|
|
16
16
|
anything_to_markdown,
|
17
17
|
citation_chunker,
|
18
18
|
get_default_html_to_markdown_options,
|
19
|
+
get_youtube_video_subtitle,
|
19
20
|
html_to_markdown,
|
20
21
|
init_webpage_to_markdown,
|
21
22
|
pdf_to_text,
|
22
23
|
pyscripts_to_snippets,
|
24
|
+
get_youtube_video_details,
|
23
25
|
)
|
24
26
|
from .utils import (
|
25
27
|
Base64Image,
|
@@ -52,4 +54,7 @@ __all__ = [
|
|
52
54
|
"CodeExecutionResult",
|
53
55
|
"get_default_repl_tool",
|
54
56
|
"insert_callables_into_global",
|
57
|
+
"get_youtube_video_subtitle",
|
58
|
+
"get_youtube_video_details",
|
59
|
+
"interactive_shell",
|
55
60
|
]
|
chatterer/language_model.py
CHANGED
@@ -22,7 +22,7 @@ from langchain_core.runnables.config import RunnableConfig
|
|
22
22
|
from pydantic import BaseModel, Field
|
23
23
|
|
24
24
|
from .messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
|
25
|
-
from .utils.code_agent import CodeExecutionResult, FunctionSignature
|
25
|
+
from .utils.code_agent import CodeExecutionResult, FunctionSignature, get_default_repl_tool
|
26
26
|
|
27
27
|
if TYPE_CHECKING:
|
28
28
|
from instructor import Partial
|
@@ -35,13 +35,11 @@ DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION = "Provide a detailed description of all v
|
|
35
35
|
DEFAULT_CODE_GENERATION_PROMPT = (
|
36
36
|
"You are utilizing a Python code execution tool now.\n"
|
37
37
|
"Your goal is to generate Python code that solves the task efficiently and appends both the code and its output to your context memory.\n"
|
38
|
-
"Since your context window is highly limited, type `pass` if no code execution is needed.\n"
|
39
38
|
"\n"
|
40
39
|
"To optimize tool efficiency, follow these guidelines:\n"
|
41
40
|
"- Write concise, efficient code that directly serves the intended purpose.\n"
|
42
41
|
"- Avoid unnecessary operations (e.g., excessive loops, recursion, or heavy computations).\n"
|
43
42
|
"- Handle potential errors gracefully (e.g., using try-except blocks).\n"
|
44
|
-
"- Prevent excessive output by limiting print statements to essential information only (e.g., avoid printing large datasets).\n"
|
45
43
|
"\n"
|
46
44
|
"Return your response strictly in the following JSON format:\n"
|
47
45
|
'{\n "code": "<your_python_code_here>"\n}\n\n'
|
@@ -189,7 +187,7 @@ class Chatterer(BaseModel):
|
|
189
187
|
stop: Optional[list[str]] = None,
|
190
188
|
**kwargs: Any,
|
191
189
|
) -> PydanticModelT:
|
192
|
-
result: StructuredOutputType =
|
190
|
+
result: StructuredOutputType = _with_structured_output(
|
193
191
|
client=self.client,
|
194
192
|
response_model=response_model,
|
195
193
|
structured_output_kwargs=self.structured_output_kwargs,
|
@@ -207,7 +205,7 @@ class Chatterer(BaseModel):
|
|
207
205
|
stop: Optional[list[str]] = None,
|
208
206
|
**kwargs: Any,
|
209
207
|
) -> PydanticModelT:
|
210
|
-
result: StructuredOutputType = await
|
208
|
+
result: StructuredOutputType = await _with_structured_output(
|
211
209
|
client=self.client,
|
212
210
|
response_model=response_model,
|
213
211
|
structured_output_kwargs=self.structured_output_kwargs,
|
@@ -231,7 +229,7 @@ class Chatterer(BaseModel):
|
|
231
229
|
raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
|
232
230
|
|
233
231
|
partial_response_model = instructor.Partial[response_model]
|
234
|
-
for chunk in
|
232
|
+
for chunk in _with_structured_output(
|
235
233
|
client=self.client,
|
236
234
|
response_model=partial_response_model,
|
237
235
|
structured_output_kwargs=self.structured_output_kwargs,
|
@@ -252,7 +250,7 @@ class Chatterer(BaseModel):
|
|
252
250
|
raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
|
253
251
|
|
254
252
|
partial_response_model = instructor.Partial[response_model]
|
255
|
-
async for chunk in
|
253
|
+
async for chunk in _with_structured_output(
|
256
254
|
client=self.client,
|
257
255
|
response_model=partial_response_model,
|
258
256
|
structured_output_kwargs=self.structured_output_kwargs,
|
@@ -320,26 +318,24 @@ class Chatterer(BaseModel):
|
|
320
318
|
messages: LanguageModelInput,
|
321
319
|
repl_tool: Optional["PythonAstREPLTool"] = None,
|
322
320
|
prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
|
323
|
-
|
321
|
+
function_signatures: Optional[FunctionSignature | Iterable[FunctionSignature]] = None,
|
324
322
|
function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
|
325
323
|
function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
|
326
324
|
config: Optional[RunnableConfig] = None,
|
327
325
|
stop: Optional[list[str]] = None,
|
328
326
|
**kwargs: Any,
|
329
327
|
) -> CodeExecutionResult:
|
330
|
-
function_signatures:
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
if prompt_for_code_invoke:
|
342
|
-
messages = _add_message_last(messages=messages, prompt_to_add=prompt_for_code_invoke)
|
328
|
+
if not function_signatures:
|
329
|
+
function_signatures = []
|
330
|
+
elif isinstance(function_signatures, FunctionSignature):
|
331
|
+
function_signatures = [function_signatures]
|
332
|
+
messages = augment_prompt_for_toolcall(
|
333
|
+
function_signatures=function_signatures,
|
334
|
+
messages=messages,
|
335
|
+
prompt_for_code_invoke=prompt_for_code_invoke,
|
336
|
+
function_reference_prefix=function_reference_prefix,
|
337
|
+
function_reference_seperator=function_reference_seperator,
|
338
|
+
)
|
343
339
|
code_obj: PythonCodeToExecute = self.generate_pydantic(
|
344
340
|
response_model=PythonCodeToExecute, messages=messages, config=config, stop=stop, **kwargs
|
345
341
|
)
|
@@ -363,19 +359,14 @@ class Chatterer(BaseModel):
|
|
363
359
|
stop: Optional[list[str]] = None,
|
364
360
|
**kwargs: Any,
|
365
361
|
) -> CodeExecutionResult:
|
366
|
-
function_signatures:
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
function_signatures, function_reference_prefix, function_reference_seperator
|
375
|
-
),
|
376
|
-
)
|
377
|
-
if prompt_for_code_invoke:
|
378
|
-
messages = _add_message_last(messages=messages, prompt_to_add=prompt_for_code_invoke)
|
362
|
+
function_signatures: list[FunctionSignature] = FunctionSignature.from_callable(additional_callables)
|
363
|
+
messages = augment_prompt_for_toolcall(
|
364
|
+
function_signatures=function_signatures,
|
365
|
+
messages=messages,
|
366
|
+
prompt_for_code_invoke=prompt_for_code_invoke,
|
367
|
+
function_reference_prefix=function_reference_prefix,
|
368
|
+
function_reference_seperator=function_reference_seperator,
|
369
|
+
)
|
379
370
|
code_obj: PythonCodeToExecute = await self.agenerate_pydantic(
|
380
371
|
response_model=PythonCodeToExecute, messages=messages, config=config, stop=stop, **kwargs
|
381
372
|
)
|
@@ -392,7 +383,7 @@ class PythonCodeToExecute(BaseModel):
|
|
392
383
|
code: str = Field(description="Python code to execute")
|
393
384
|
|
394
385
|
|
395
|
-
def
|
386
|
+
def _with_structured_output(
|
396
387
|
client: BaseChatModel,
|
397
388
|
response_model: Type["PydanticModelT | Partial[PydanticModelT]"],
|
398
389
|
structured_output_kwargs: dict[str, Any],
|
@@ -400,93 +391,191 @@ def with_structured_output(
|
|
400
391
|
return client.with_structured_output(schema=response_model, **structured_output_kwargs) # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
|
401
392
|
|
402
393
|
|
403
|
-
def _add_message_last(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
|
394
|
+
# def _add_message_last(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
|
395
|
+
# if isinstance(messages, str):
|
396
|
+
# messages += f"\n{prompt_to_add}"
|
397
|
+
# elif isinstance(messages, Sequence):
|
398
|
+
# messages = list(messages)
|
399
|
+
# messages.append(SystemMessage(content=prompt_to_add))
|
400
|
+
# else:
|
401
|
+
# messages = messages.to_messages()
|
402
|
+
# messages.append(SystemMessage(content=prompt_to_add))
|
403
|
+
# return messages
|
404
|
+
|
405
|
+
|
406
|
+
def _add_message_first(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
|
404
407
|
if isinstance(messages, str):
|
405
|
-
messages
|
408
|
+
messages = f"{prompt_to_add}\n{messages}"
|
406
409
|
elif isinstance(messages, Sequence):
|
407
410
|
messages = list(messages)
|
408
|
-
messages.
|
411
|
+
messages.insert(0, SystemMessage(content=prompt_to_add))
|
409
412
|
else:
|
410
413
|
messages = messages.to_messages()
|
411
|
-
messages.
|
414
|
+
messages.insert(0, SystemMessage(content=prompt_to_add))
|
412
415
|
return messages
|
413
416
|
|
414
417
|
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
418
|
+
def augment_prompt_for_toolcall(
|
419
|
+
function_signatures: Iterable[FunctionSignature],
|
420
|
+
messages: LanguageModelInput,
|
421
|
+
prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
|
422
|
+
function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
|
423
|
+
function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
|
424
|
+
) -> LanguageModelInput:
|
425
|
+
if function_signatures:
|
426
|
+
messages = _add_message_first(
|
427
|
+
messages=messages,
|
428
|
+
prompt_to_add=FunctionSignature.as_prompt(
|
429
|
+
function_signatures, function_reference_prefix, function_reference_seperator
|
430
|
+
),
|
431
|
+
)
|
432
|
+
if prompt_for_code_invoke:
|
433
|
+
messages = _add_message_first(messages=messages, prompt_to_add=prompt_for_code_invoke)
|
434
|
+
return messages
|
426
435
|
|
427
|
-
def chatbot_example(chatterer: Chatterer = Chatterer.openai()) -> None:
|
428
|
-
# Define the CodeExecutionDecision class using Pydantic
|
429
436
|
|
437
|
+
def interactive_shell(
|
438
|
+
chatterer: Chatterer = Chatterer.openai(),
|
439
|
+
system_instruction: BaseMessage | Iterable[BaseMessage] = ([
|
440
|
+
SystemMessage("You are an AI that can answer questions and execute Python code."),
|
441
|
+
]),
|
442
|
+
repl_tool: Optional["PythonAstREPLTool"] = None,
|
443
|
+
prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
|
444
|
+
additional_callables: Optional[Callable[..., object] | Sequence[Callable[..., object]]] = None,
|
445
|
+
function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
|
446
|
+
function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
|
447
|
+
config: Optional[RunnableConfig] = None,
|
448
|
+
stop: Optional[list[str]] = None,
|
449
|
+
**kwargs: Any,
|
450
|
+
) -> None:
|
430
451
|
from rich.console import Console
|
431
452
|
from rich.prompt import Prompt
|
432
453
|
|
433
|
-
|
454
|
+
# 코드 실행 필요 여부를 판단하는 모델
|
455
|
+
class IsCodeExecutionNeeded(BaseModel):
|
434
456
|
is_code_execution_needed: bool = Field(
|
435
457
|
description="Whether Python tool calling is needed to answer user query."
|
436
458
|
)
|
437
459
|
|
438
|
-
#
|
439
|
-
|
460
|
+
# 추가 코드 실행 필요 여부를 판단하는 모델
|
461
|
+
class IsFurtherCodeExecutionNeeded(BaseModel):
|
462
|
+
review_on_code_execution: str = Field(description="Review on the code execution.")
|
463
|
+
next_action: str = Field(description="Next action to take.")
|
464
|
+
is_further_code_execution_needed: bool = Field(
|
465
|
+
description="Whether further Python tool calling is needed to answer user query."
|
466
|
+
)
|
467
|
+
|
468
|
+
# REPL 도구 초기화
|
469
|
+
if repl_tool is None:
|
470
|
+
repl_tool = get_default_repl_tool()
|
440
471
|
|
441
|
-
|
442
|
-
|
472
|
+
function_signatures: list[FunctionSignature] = FunctionSignature.from_callable(additional_callables)
|
473
|
+
console = Console()
|
474
|
+
context: list[BaseMessage] = []
|
475
|
+
if system_instruction:
|
476
|
+
if isinstance(system_instruction, BaseMessage):
|
477
|
+
context.append(system_instruction)
|
478
|
+
else:
|
479
|
+
context.extend(system_instruction)
|
443
480
|
|
444
|
-
#
|
445
|
-
console.print("[bold blue]Welcome to the
|
481
|
+
# 환영 메시지
|
482
|
+
console.print("[bold blue]Welcome to the Interactive Chatterer Shell![/bold blue]")
|
446
483
|
console.print("Type 'quit' or 'exit' to end the conversation.")
|
447
484
|
|
448
485
|
while True:
|
449
|
-
#
|
486
|
+
# 사용자 입력 받기
|
450
487
|
user_input = Prompt.ask("[bold green]You[/bold green]")
|
451
488
|
if user_input.lower() in ["quit", "exit"]:
|
452
489
|
console.print("[bold blue]Goodbye![/bold blue]")
|
453
490
|
break
|
454
491
|
|
455
|
-
# Add user message to context
|
456
492
|
context.append(HumanMessage(content=user_input))
|
457
493
|
|
458
|
-
#
|
494
|
+
# 코드 실행 필요 여부 판단
|
459
495
|
decision = chatterer.generate_pydantic(
|
460
|
-
response_model=
|
461
|
-
messages=
|
496
|
+
response_model=IsCodeExecutionNeeded,
|
497
|
+
messages=augment_prompt_for_toolcall(
|
498
|
+
function_signatures=function_signatures,
|
499
|
+
messages=context,
|
500
|
+
prompt_for_code_invoke=prompt_for_code_invoke,
|
501
|
+
function_reference_prefix=function_reference_prefix,
|
502
|
+
function_reference_seperator=function_reference_seperator,
|
503
|
+
),
|
462
504
|
)
|
463
505
|
|
506
|
+
# 코드 실행 처리
|
464
507
|
if decision.is_code_execution_needed:
|
465
|
-
|
466
|
-
|
508
|
+
code_result = chatterer.invoke_code_execution(
|
509
|
+
messages=context,
|
510
|
+
repl_tool=repl_tool,
|
511
|
+
prompt_for_code_invoke=prompt_for_code_invoke,
|
512
|
+
function_signatures=function_signatures,
|
513
|
+
function_reference_prefix=function_reference_prefix,
|
514
|
+
function_reference_seperator=function_reference_seperator,
|
515
|
+
config=config,
|
516
|
+
stop=stop,
|
517
|
+
**kwargs,
|
518
|
+
)
|
519
|
+
|
467
520
|
if code_result.code.strip() == "pass":
|
468
|
-
|
521
|
+
tool_use_message = None
|
469
522
|
else:
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
523
|
+
code_session_messages: list[BaseMessage] = []
|
524
|
+
while True:
|
525
|
+
code_execution_message = AIMessage(
|
526
|
+
content=f"Executed code:\n```python\n{code_result.code}\n```\nOutput:\n{code_result.output}".strip()
|
527
|
+
)
|
528
|
+
code_session_messages.append(code_execution_message)
|
529
|
+
console.print("[bold yellow]Executed code:[/bold yellow]")
|
530
|
+
console.print(f"[code]{code_result.code}[/code]")
|
531
|
+
console.print("[bold yellow]Output:[/bold yellow]")
|
532
|
+
console.print(code_result.output)
|
533
|
+
|
534
|
+
decision = chatterer.generate_pydantic(
|
535
|
+
response_model=IsFurtherCodeExecutionNeeded,
|
536
|
+
messages=augment_prompt_for_toolcall(
|
537
|
+
function_signatures=function_signatures,
|
538
|
+
messages=context + code_session_messages,
|
539
|
+
prompt_for_code_invoke=prompt_for_code_invoke,
|
540
|
+
function_reference_prefix=function_reference_prefix,
|
541
|
+
function_reference_seperator=function_reference_seperator,
|
542
|
+
),
|
543
|
+
)
|
544
|
+
review_on_code_execution = decision.review_on_code_execution.strip()
|
545
|
+
next_action = decision.next_action.strip()
|
546
|
+
console.print("[bold blue]AI:[/bold blue]")
|
547
|
+
console.print(f"-[bold yellow]Review on code execution:[/bold yellow] {review_on_code_execution}")
|
548
|
+
console.print(f"-[bold yellow]Next Action:[/bold yellow] {next_action}")
|
549
|
+
code_session_messages.append(
|
550
|
+
AIMessage(
|
551
|
+
content=f"- Review upon code execution: {review_on_code_execution}\n- Next Action: {next_action}".strip()
|
552
|
+
)
|
553
|
+
)
|
554
|
+
if not decision.is_further_code_execution_needed:
|
555
|
+
tool_use_message = code_execution_message
|
556
|
+
break
|
477
557
|
else:
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
response =
|
558
|
+
tool_use_message = None
|
559
|
+
|
560
|
+
# 코드 실행 결과 컨텍스트에 추가
|
561
|
+
if tool_use_message:
|
562
|
+
context.append(tool_use_message)
|
563
|
+
|
564
|
+
# AI 응답 스트리밍 출력
|
565
|
+
console.print("[bold blue]AI:[/bold blue] ", end="")
|
566
|
+
response = ""
|
567
|
+
for chunk in chatterer.generate_stream(messages=context):
|
568
|
+
response += chunk
|
569
|
+
console.print(chunk, end="")
|
570
|
+
|
571
|
+
# 전체 응답 처리 후 컨텍스트에 추가
|
572
|
+
lines = response.split("\n")
|
573
|
+
if lines:
|
574
|
+
lines[-1] = lines[-1].rstrip() # 마지막 줄의 오른쪽 공백 제거
|
575
|
+
response = "\n".join(lines).strip()
|
487
576
|
context.append(AIMessage(content=response))
|
488
|
-
console.print(
|
577
|
+
console.print() # 응답 후 줄바꿈 추가
|
489
578
|
|
490
579
|
|
491
580
|
if __name__ == "__main__":
|
492
|
-
|
581
|
+
interactive_shell()
|
chatterer/tools/__init__.py
CHANGED
@@ -6,6 +6,7 @@ from .convert_to_text import (
|
|
6
6
|
pdf_to_text,
|
7
7
|
pyscripts_to_snippets,
|
8
8
|
)
|
9
|
+
from .youtube import get_youtube_video_subtitle, get_youtube_video_details
|
9
10
|
|
10
11
|
|
11
12
|
def init_webpage_to_markdown():
|
@@ -22,4 +23,6 @@ __all__ = [
|
|
22
23
|
"pyscripts_to_snippets",
|
23
24
|
"citation_chunker",
|
24
25
|
"init_webpage_to_markdown",
|
26
|
+
"get_youtube_video_subtitle",
|
27
|
+
"get_youtube_video_details",
|
25
28
|
]
|
@@ -0,0 +1,146 @@
|
|
1
|
+
import json
|
2
|
+
import unicodedata
|
3
|
+
import urllib.parse
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from typing import Any, Optional, Self, cast
|
6
|
+
|
7
|
+
import requests
|
8
|
+
|
9
|
+
|
10
|
+
def get_youtube_video_details(
|
11
|
+
query: str,
|
12
|
+
) -> list[dict[str, Optional[str]]]:
|
13
|
+
"""Search for video metadata on YouTube using the given query. Returns a list of dictionaries containing `video_id`, `title`, `channel`, `duration`, `views`, `publish_time`, and `long_desc`."""
|
14
|
+
return [
|
15
|
+
{
|
16
|
+
"video_id": video_id,
|
17
|
+
"title": video.title,
|
18
|
+
"channel": video.channel,
|
19
|
+
"duration": video.duration,
|
20
|
+
"views": video.views,
|
21
|
+
"publish_time": video.publish_time,
|
22
|
+
"long_desc": video.long_desc,
|
23
|
+
}
|
24
|
+
for video in YoutubeSearchResult.from_query(base_url="https://youtube.com", query=query, max_results=10)
|
25
|
+
if (video_id := _get_video_id(video.url_suffix))
|
26
|
+
]
|
27
|
+
|
28
|
+
|
29
|
+
def get_youtube_video_subtitle(video_id: str) -> str:
|
30
|
+
"""Get the transcript of a YouTube video using the given video ID."""
|
31
|
+
|
32
|
+
from youtube_transcript_api._api import YouTubeTranscriptApi
|
33
|
+
|
34
|
+
get_transcript = YouTubeTranscriptApi.get_transcript # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
35
|
+
list_transcripts = YouTubeTranscriptApi.list_transcripts # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
36
|
+
|
37
|
+
result: str = ""
|
38
|
+
buffer_timestamp: str = "0s"
|
39
|
+
buffer_texts: list[str] = []
|
40
|
+
for entry in get_transcript(video_id, languages=(next(iter(list_transcripts(video_id))).language_code,)): # pyright: ignore[reportUnknownVariableType]
|
41
|
+
entry = cast(dict[object, object], entry)
|
42
|
+
text: str = str(entry.get("text", "")).strip().replace("\n", " ")
|
43
|
+
if not text:
|
44
|
+
continue
|
45
|
+
if len(buffer_texts) >= 10 or _is_special_char(text) or (buffer_texts and _is_special_char(buffer_texts[-1])):
|
46
|
+
result += f"[{buffer_timestamp}] {'. '.join(buffer_texts)}\n"
|
47
|
+
start = entry.get("start", 0)
|
48
|
+
if start:
|
49
|
+
buffer_timestamp = f"{start:.0f}s"
|
50
|
+
buffer_texts = [text]
|
51
|
+
else:
|
52
|
+
buffer_texts.append(text)
|
53
|
+
|
54
|
+
if buffer_texts:
|
55
|
+
result += f"[{buffer_timestamp}] {' '.join(buffer_texts)}"
|
56
|
+
return result
|
57
|
+
|
58
|
+
|
59
|
+
def _get_video_id(suffix: str) -> str:
|
60
|
+
urllib_parse_result = urllib.parse.urlparse(suffix)
|
61
|
+
if urllib_parse_result.path.startswith("/shorts/"):
|
62
|
+
# Fore shorts (/shorts/...) the video ID is in the path
|
63
|
+
parts = urllib_parse_result.path.split("/")
|
64
|
+
if len(parts) < 3:
|
65
|
+
print(f"Failed to get video ID from {suffix}")
|
66
|
+
return ""
|
67
|
+
return parts[2]
|
68
|
+
|
69
|
+
query: str = urllib.parse.urlparse(suffix).query
|
70
|
+
query_strings = urllib.parse.parse_qs(query)
|
71
|
+
if "v" not in query_strings:
|
72
|
+
print(f"Failed to get video ID from {suffix}")
|
73
|
+
return ""
|
74
|
+
return next(iter(query_strings["v"]), "")
|
75
|
+
|
76
|
+
|
77
|
+
def _is_special_char(text: str) -> bool:
|
78
|
+
if not text:
|
79
|
+
return False
|
80
|
+
return not unicodedata.category(text[0]).startswith("L")
|
81
|
+
|
82
|
+
|
83
|
+
@dataclass
|
84
|
+
class YoutubeSearchResult:
|
85
|
+
url_suffix: str
|
86
|
+
id: Optional[str]
|
87
|
+
thumbnails: list[str]
|
88
|
+
title: Optional[str]
|
89
|
+
long_desc: Optional[str]
|
90
|
+
channel: Optional[str]
|
91
|
+
duration: Optional[str]
|
92
|
+
views: Optional[str]
|
93
|
+
publish_time: Optional[str]
|
94
|
+
|
95
|
+
@classmethod
|
96
|
+
def from_query(cls, base_url: str, query: str, max_results: int) -> list[Self]:
|
97
|
+
url: str = f"{base_url}/results?search_query={urllib.parse.quote_plus(query)}"
|
98
|
+
response: str = requests.get(url).text
|
99
|
+
while "ytInitialData" not in response:
|
100
|
+
response = requests.get(url).text
|
101
|
+
results: list[Self] = cls.parse_html(response)
|
102
|
+
return results[:max_results]
|
103
|
+
|
104
|
+
@classmethod
|
105
|
+
def parse_html(cls, html: str) -> list[Self]:
|
106
|
+
results: list[Self] = []
|
107
|
+
start: int = html.index("ytInitialData") + len("ytInitialData") + 3
|
108
|
+
end: int = html.index("};", start) + 1
|
109
|
+
data: Any = json.loads(html[start:end])
|
110
|
+
for contents in data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"][
|
111
|
+
"contents"
|
112
|
+
]:
|
113
|
+
for video in contents["itemSectionRenderer"]["contents"]:
|
114
|
+
if "videoRenderer" in video.keys():
|
115
|
+
video_data = video.get("videoRenderer", {})
|
116
|
+
suffix = (
|
117
|
+
video_data.get("navigationEndpoint", {})
|
118
|
+
.get("commandMetadata", {})
|
119
|
+
.get("webCommandMetadata", {})
|
120
|
+
.get("url", None)
|
121
|
+
)
|
122
|
+
if not suffix:
|
123
|
+
continue
|
124
|
+
res = cls(
|
125
|
+
id=video_data.get("videoId", None),
|
126
|
+
thumbnails=[
|
127
|
+
thumb.get("url", None) for thumb in video_data.get("thumbnail", {}).get("thumbnails", [{}])
|
128
|
+
],
|
129
|
+
title=video_data.get("title", {}).get("runs", [[{}]])[0].get("text", None),
|
130
|
+
long_desc=video_data.get("descriptionSnippet", {}).get("runs", [{}])[0].get("text", None),
|
131
|
+
channel=video_data.get("longBylineText", {}).get("runs", [[{}]])[0].get("text", None),
|
132
|
+
duration=video_data.get("lengthText", {}).get("simpleText", 0),
|
133
|
+
views=video_data.get("viewCountText", {}).get("simpleText", 0),
|
134
|
+
publish_time=video_data.get("publishedTimeText", {}).get("simpleText", 0),
|
135
|
+
url_suffix=suffix,
|
136
|
+
)
|
137
|
+
results.append(res)
|
138
|
+
|
139
|
+
if results:
|
140
|
+
break
|
141
|
+
return results
|
142
|
+
|
143
|
+
|
144
|
+
if __name__ == "__main__":
|
145
|
+
print(get_youtube_video_details("BTS"))
|
146
|
+
# print(get_youtube_transcript("y7jrpS8GHxs"))
|
chatterer/utils/code_agent.py
CHANGED
@@ -21,7 +21,15 @@ class FunctionSignature(NamedTuple):
|
|
21
21
|
signature: str
|
22
22
|
|
23
23
|
@classmethod
|
24
|
-
def from_callable(cls,
|
24
|
+
def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
|
25
|
+
if callables is None:
|
26
|
+
return []
|
27
|
+
if callable(callables):
|
28
|
+
return [cls._from_callable(callables)]
|
29
|
+
return [cls._from_callable(callable) for callable in callables]
|
30
|
+
|
31
|
+
@classmethod
|
32
|
+
def _from_callable(cls, callable: Callable[..., object]) -> Self:
|
25
33
|
"""
|
26
34
|
Get the name and signature of a function as a string.
|
27
35
|
"""
|
@@ -54,21 +62,17 @@ class FunctionSignature(NamedTuple):
|
|
54
62
|
else:
|
55
63
|
return cls(name=function_name, callable=callable, signature=signature)
|
56
64
|
|
57
|
-
@classmethod
|
58
|
-
def from_callables(cls, callables: Iterable[Callable[..., object]]) -> list[Self]:
|
59
|
-
return [cls.from_callable(callable) for callable in callables]
|
60
|
-
|
61
65
|
@classmethod
|
62
66
|
def as_prompt(
|
63
67
|
cls,
|
64
|
-
|
68
|
+
function_signatures: Iterable[Self],
|
65
69
|
prefix: Optional[str] = "You can use the pre-made functions below without defining them:\n",
|
66
70
|
sep: str = "\n---\n",
|
67
71
|
) -> str:
|
68
72
|
"""
|
69
73
|
Generate a prompt string from a list of callables.
|
70
74
|
"""
|
71
|
-
body: str = sep.join(fsig.signature for fsig in
|
75
|
+
body: str = sep.join(fsig.signature for fsig in function_signatures)
|
72
76
|
if prefix:
|
73
77
|
return f"{prefix}{body}"
|
74
78
|
return body
|
@@ -92,7 +96,7 @@ class CodeExecutionResult(NamedTuple):
|
|
92
96
|
"""
|
93
97
|
if repl_tool is None:
|
94
98
|
repl_tool = get_default_repl_tool()
|
95
|
-
if function_signatures
|
99
|
+
if function_signatures:
|
96
100
|
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
97
101
|
output = str(repl_tool.invoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
|
98
102
|
return cls(code=code, output=output)
|
@@ -111,7 +115,7 @@ class CodeExecutionResult(NamedTuple):
|
|
111
115
|
"""
|
112
116
|
if repl_tool is None:
|
113
117
|
repl_tool = get_default_repl_tool()
|
114
|
-
if function_signatures
|
118
|
+
if function_signatures:
|
115
119
|
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
116
120
|
output = str(await repl_tool.ainvoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
|
117
121
|
return cls(code=code, output=output)
|
chatterer/utils/image.py
CHANGED
@@ -64,7 +64,7 @@ class Base64Image(BaseModel):
|
|
64
64
|
|
65
65
|
IMAGE_TYPES: ClassVar[tuple[str, ...]] = tuple(map(str, get_args(ImageType)))
|
66
66
|
IMAGE_PATTERN: ClassVar[re.Pattern[str]] = re.compile(
|
67
|
-
|
67
|
+
r"data:image/(" + "|".join(IMAGE_TYPES) + r");base64,([A-Za-z0-9+/]+={0,2})"
|
68
68
|
)
|
69
69
|
|
70
70
|
def __hash__(self) -> int:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: chatterer
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.11
|
4
4
|
Summary: The highest-level interface for various LLM APIs.
|
5
5
|
Requires-Python: >=3.12
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -18,6 +18,7 @@ Requires-Dist: pillow>=11.1.0; extra == "conversion"
|
|
18
18
|
Requires-Dist: mistune>=3.1.2; extra == "conversion"
|
19
19
|
Requires-Dist: markitdown>=0.0.2; extra == "conversion"
|
20
20
|
Requires-Dist: pymupdf>=1.25.4; extra == "conversion"
|
21
|
+
Requires-Dist: youtube-transcript-api>=1.0.2; extra == "conversion"
|
21
22
|
Provides-Extra: langchain
|
22
23
|
Requires-Dist: chatterer[langchain-providers]; extra == "langchain"
|
23
24
|
Requires-Dist: langchain-experimental>=0.3.4; extra == "langchain"
|
@@ -1,12 +1,13 @@
|
|
1
|
-
chatterer/__init__.py,sha256=
|
2
|
-
chatterer/language_model.py,sha256=
|
1
|
+
chatterer/__init__.py,sha256=BPgCQ6VWGBXSh8xJr_0bpM0hcOOUz0KoxcKxOd9GYyI,1388
|
2
|
+
chatterer/language_model.py,sha256=DX_mU855JHHqE0gdnieWZNOwX1BjIO4VK4EightRL3w,24353
|
3
3
|
chatterer/messages.py,sha256=OtbZ3two0LUQ4PXES97FDIBUSO3IcMHdFV1VFkDL2mI,229
|
4
4
|
chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
chatterer/strategies/__init__.py,sha256=SdOggbmHpw4f7Njwy-T8q64e91OLOUp1k0a0ozZd4qI,221
|
6
6
|
chatterer/strategies/atom_of_thoughts.py,sha256=CygOCLu5vLk-fzY9O-iE3qLShfjD7iY40ks9jH4ULBM,40872
|
7
7
|
chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
|
8
|
-
chatterer/tools/__init__.py,sha256=
|
8
|
+
chatterer/tools/__init__.py,sha256=hmWIuLJWotGQodL__i4LLbHdXe7Nl5uKHqNke9tHMro,705
|
9
9
|
chatterer/tools/convert_to_text.py,sha256=kBqxCJ0IoiAw2eiPYqep_SPZm-TtYKF7mdACLsWQUuI,15915
|
10
|
+
chatterer/tools/youtube.py,sha256=GhyE05JBF_eos01A_N-X5tZv4wQJ--IjErBbEBeNBpQ,6037
|
10
11
|
chatterer/tools/citation_chunking/__init__.py,sha256=gG7Fnkkp28UpcWMbfMY_4gqzZSZ8QzlhalHBoeoq7K0,82
|
11
12
|
chatterer/tools/citation_chunking/chunks.py,sha256=50Dpa43RaYftlNox8tM1qI8htZ3_AJ9Uyyn02WsmxYk,2173
|
12
13
|
chatterer/tools/citation_chunking/citation_chunker.py,sha256=yx5O9pUkowlNcFyyNf7f3sbq7-CV8AXOzFnviDldPR8,4894
|
@@ -18,9 +19,9 @@ chatterer/tools/webpage_to_markdown/__init__.py,sha256=bHH4qfnXyw8Zz-yBPLaTezF1s
|
|
18
19
|
chatterer/tools/webpage_to_markdown/playwright_bot.py,sha256=yP0KixYZNQ4Kn_ZCFDI3mVyBD_DpUGfqgklpaGJUTCU,27496
|
19
20
|
chatterer/tools/webpage_to_markdown/utils.py,sha256=ZLUU94imYciEdynD2K7Dmcsbt8BVQTaOP56Ba6DAFvk,12593
|
20
21
|
chatterer/utils/__init__.py,sha256=8nzpFJKU_wSRPH6LBP6HRBotPMrSl_VO9UlmFprTrK0,334
|
21
|
-
chatterer/utils/code_agent.py,sha256=
|
22
|
-
chatterer/utils/image.py,sha256=
|
23
|
-
chatterer-0.1.
|
24
|
-
chatterer-0.1.
|
25
|
-
chatterer-0.1.
|
26
|
-
chatterer-0.1.
|
22
|
+
chatterer/utils/code_agent.py,sha256=UaWdeGzJMPzRSFy9yrxuveBJsvOPSa0te6OuE18bees,5143
|
23
|
+
chatterer/utils/image.py,sha256=1imiyq6TB9NIIGx3zAA2OwMWuXlifYIAjwfWRWa4WIM,10858
|
24
|
+
chatterer-0.1.11.dist-info/METADATA,sha256=S3hRkxG1DlFc_NGrra1xhniiCDDVoVrow2N96OJy8i0,4458
|
25
|
+
chatterer-0.1.11.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
|
26
|
+
chatterer-0.1.11.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
|
27
|
+
chatterer-0.1.11.dist-info/RECORD,,
|
File without changes
|