chatterer 0.1.8__py3-none-any.whl → 0.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterer/__init__.py +22 -1
- chatterer/language_model.py +287 -78
- chatterer/messages.py +2 -1
- chatterer/tools/__init__.py +11 -0
- chatterer/tools/youtube.py +132 -0
- chatterer/utils/__init__.py +15 -0
- chatterer/utils/code_agent.py +138 -0
- {chatterer-0.1.8.dist-info → chatterer-0.1.10.dist-info}/METADATA +6 -2
- {chatterer-0.1.8.dist-info → chatterer-0.1.10.dist-info}/RECORD +11 -8
- {chatterer-0.1.8.dist-info → chatterer-0.1.10.dist-info}/WHEEL +0 -0
- {chatterer-0.1.8.dist-info → chatterer-0.1.10.dist-info}/top_level.txt +0 -0
chatterer/__init__.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1
|
-
from .language_model import Chatterer
|
1
|
+
from .language_model import Chatterer, interactive_shell
|
2
2
|
from .messages import (
|
3
3
|
AIMessage,
|
4
4
|
BaseMessage,
|
5
|
+
FunctionMessage,
|
5
6
|
HumanMessage,
|
6
7
|
SystemMessage,
|
7
8
|
)
|
@@ -15,9 +16,19 @@ from .tools import (
|
|
15
16
|
anything_to_markdown,
|
16
17
|
citation_chunker,
|
17
18
|
get_default_html_to_markdown_options,
|
19
|
+
get_youtube_video_subtitle,
|
18
20
|
html_to_markdown,
|
21
|
+
init_webpage_to_markdown,
|
19
22
|
pdf_to_text,
|
20
23
|
pyscripts_to_snippets,
|
24
|
+
get_youtube_video_details,
|
25
|
+
)
|
26
|
+
from .utils import (
|
27
|
+
Base64Image,
|
28
|
+
CodeExecutionResult,
|
29
|
+
FunctionSignature,
|
30
|
+
get_default_repl_tool,
|
31
|
+
insert_callables_into_global,
|
21
32
|
)
|
22
33
|
|
23
34
|
__all__ = [
|
@@ -36,4 +47,14 @@ __all__ = [
|
|
36
47
|
"HumanMessage",
|
37
48
|
"SystemMessage",
|
38
49
|
"AIMessage",
|
50
|
+
"FunctionMessage",
|
51
|
+
"Base64Image",
|
52
|
+
"init_webpage_to_markdown",
|
53
|
+
"FunctionSignature",
|
54
|
+
"CodeExecutionResult",
|
55
|
+
"get_default_repl_tool",
|
56
|
+
"insert_callables_into_global",
|
57
|
+
"get_youtube_video_subtitle",
|
58
|
+
"get_youtube_video_details",
|
59
|
+
"interactive_shell",
|
39
60
|
]
|
chatterer/language_model.py
CHANGED
@@ -2,9 +2,12 @@ from typing import (
|
|
2
2
|
TYPE_CHECKING,
|
3
3
|
Any,
|
4
4
|
AsyncIterator,
|
5
|
+
Callable,
|
6
|
+
Iterable,
|
5
7
|
Iterator,
|
6
8
|
Optional,
|
7
9
|
Self,
|
10
|
+
Sequence,
|
8
11
|
Type,
|
9
12
|
TypeAlias,
|
10
13
|
TypeVar,
|
@@ -18,15 +21,39 @@ from langchain_core.runnables.base import Runnable
|
|
18
21
|
from langchain_core.runnables.config import RunnableConfig
|
19
22
|
from pydantic import BaseModel, Field
|
20
23
|
|
21
|
-
from .messages import AIMessage, BaseMessage, HumanMessage
|
24
|
+
from .messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
|
25
|
+
from .utils.code_agent import CodeExecutionResult, FunctionSignature, get_default_repl_tool
|
22
26
|
|
23
27
|
if TYPE_CHECKING:
|
24
28
|
from instructor import Partial
|
29
|
+
from langchain_experimental.tools.python.tool import PythonAstREPLTool
|
25
30
|
|
26
31
|
PydanticModelT = TypeVar("PydanticModelT", bound=BaseModel)
|
27
32
|
StructuredOutputType: TypeAlias = dict[object, object] | BaseModel
|
28
33
|
|
29
|
-
DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION = "
|
34
|
+
DEFAULT_IMAGE_DESCRIPTION_INSTRUCTION = "Provide a detailed description of all visible elements in the image, summarizing key details in a few clear sentences."
|
35
|
+
DEFAULT_CODE_GENERATION_PROMPT = (
|
36
|
+
"You are utilizing a Python code execution tool now.\n"
|
37
|
+
"Your goal is to generate Python code that solves the task efficiently and appends both the code and its output to your context memory.\n"
|
38
|
+
"Since your context window is highly limited, type `pass` if no code execution is needed.\n"
|
39
|
+
"\n"
|
40
|
+
"To optimize tool efficiency, follow these guidelines:\n"
|
41
|
+
"- Write concise, efficient code that directly serves the intended purpose.\n"
|
42
|
+
"- Avoid unnecessary operations (e.g., excessive loops, recursion, or heavy computations).\n"
|
43
|
+
"- Handle potential errors gracefully (e.g., using try-except blocks).\n"
|
44
|
+
"- Prevent excessive output by limiting print statements to essential information only (e.g., avoid printing large datasets).\n"
|
45
|
+
"\n"
|
46
|
+
"Return your response strictly in the following JSON format:\n"
|
47
|
+
'{\n "code": "<your_python_code_here>"\n}\n\n'
|
48
|
+
)
|
49
|
+
|
50
|
+
|
51
|
+
DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT = (
|
52
|
+
"Below functions are included in global scope and can be used in your code.\n"
|
53
|
+
"Do not try to redefine the function(s).\n"
|
54
|
+
"You don't have to force yourself to use these tools - use them only when you need to.\n"
|
55
|
+
)
|
56
|
+
DEFAULT_FUNCTION_REFERENCE_SEPARATOR = "\n---\n" # Separator to distinguish different function references
|
30
57
|
|
31
58
|
|
32
59
|
class Chatterer(BaseModel):
|
@@ -162,7 +189,7 @@ class Chatterer(BaseModel):
|
|
162
189
|
stop: Optional[list[str]] = None,
|
163
190
|
**kwargs: Any,
|
164
191
|
) -> PydanticModelT:
|
165
|
-
result: StructuredOutputType =
|
192
|
+
result: StructuredOutputType = _with_structured_output(
|
166
193
|
client=self.client,
|
167
194
|
response_model=response_model,
|
168
195
|
structured_output_kwargs=self.structured_output_kwargs,
|
@@ -180,7 +207,7 @@ class Chatterer(BaseModel):
|
|
180
207
|
stop: Optional[list[str]] = None,
|
181
208
|
**kwargs: Any,
|
182
209
|
) -> PydanticModelT:
|
183
|
-
result: StructuredOutputType = await
|
210
|
+
result: StructuredOutputType = await _with_structured_output(
|
184
211
|
client=self.client,
|
185
212
|
response_model=response_model,
|
186
213
|
structured_output_kwargs=self.structured_output_kwargs,
|
@@ -204,7 +231,7 @@ class Chatterer(BaseModel):
|
|
204
231
|
raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
|
205
232
|
|
206
233
|
partial_response_model = instructor.Partial[response_model]
|
207
|
-
for chunk in
|
234
|
+
for chunk in _with_structured_output(
|
208
235
|
client=self.client,
|
209
236
|
response_model=partial_response_model,
|
210
237
|
structured_output_kwargs=self.structured_output_kwargs,
|
@@ -225,7 +252,7 @@ class Chatterer(BaseModel):
|
|
225
252
|
raise ImportError("Please install `instructor` with `pip install instructor` to use this feature.")
|
226
253
|
|
227
254
|
partial_response_model = instructor.Partial[response_model]
|
228
|
-
async for chunk in
|
255
|
+
async for chunk in _with_structured_output(
|
229
256
|
client=self.client,
|
230
257
|
response_model=partial_response_model,
|
231
258
|
structured_output_kwargs=self.structured_output_kwargs,
|
@@ -288,8 +315,77 @@ class Chatterer(BaseModel):
|
|
288
315
|
except Exception:
|
289
316
|
return None
|
290
317
|
|
318
|
+
def invoke_code_execution(
|
319
|
+
self,
|
320
|
+
messages: LanguageModelInput,
|
321
|
+
repl_tool: Optional["PythonAstREPLTool"] = None,
|
322
|
+
prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
|
323
|
+
function_signatures: Optional[FunctionSignature | Iterable[FunctionSignature]] = None,
|
324
|
+
function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
|
325
|
+
function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
|
326
|
+
config: Optional[RunnableConfig] = None,
|
327
|
+
stop: Optional[list[str]] = None,
|
328
|
+
**kwargs: Any,
|
329
|
+
) -> CodeExecutionResult:
|
330
|
+
if not function_signatures:
|
331
|
+
function_signatures = []
|
332
|
+
elif isinstance(function_signatures, FunctionSignature):
|
333
|
+
function_signatures = [function_signatures]
|
334
|
+
messages = augment_prompt_for_toolcall(
|
335
|
+
function_signatures=function_signatures,
|
336
|
+
messages=messages,
|
337
|
+
prompt_for_code_invoke=prompt_for_code_invoke,
|
338
|
+
function_reference_prefix=function_reference_prefix,
|
339
|
+
function_reference_seperator=function_reference_seperator,
|
340
|
+
)
|
341
|
+
code_obj: PythonCodeToExecute = self.generate_pydantic(
|
342
|
+
response_model=PythonCodeToExecute, messages=messages, config=config, stop=stop, **kwargs
|
343
|
+
)
|
344
|
+
return CodeExecutionResult.from_code(
|
345
|
+
code=code_obj.code,
|
346
|
+
config=config,
|
347
|
+
repl_tool=repl_tool,
|
348
|
+
function_signatures=function_signatures,
|
349
|
+
**kwargs,
|
350
|
+
)
|
351
|
+
|
352
|
+
async def ainvoke_code_execution(
|
353
|
+
self,
|
354
|
+
messages: LanguageModelInput,
|
355
|
+
repl_tool: Optional["PythonAstREPLTool"] = None,
|
356
|
+
prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
|
357
|
+
additional_callables: Optional[Callable[..., object] | Sequence[Callable[..., object]]] = None,
|
358
|
+
function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
|
359
|
+
function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
|
360
|
+
config: Optional[RunnableConfig] = None,
|
361
|
+
stop: Optional[list[str]] = None,
|
362
|
+
**kwargs: Any,
|
363
|
+
) -> CodeExecutionResult:
|
364
|
+
function_signatures: list[FunctionSignature] = FunctionSignature.from_callable(additional_callables)
|
365
|
+
messages = augment_prompt_for_toolcall(
|
366
|
+
function_signatures=function_signatures,
|
367
|
+
messages=messages,
|
368
|
+
prompt_for_code_invoke=prompt_for_code_invoke,
|
369
|
+
function_reference_prefix=function_reference_prefix,
|
370
|
+
function_reference_seperator=function_reference_seperator,
|
371
|
+
)
|
372
|
+
code_obj: PythonCodeToExecute = await self.agenerate_pydantic(
|
373
|
+
response_model=PythonCodeToExecute, messages=messages, config=config, stop=stop, **kwargs
|
374
|
+
)
|
375
|
+
return await CodeExecutionResult.afrom_code(
|
376
|
+
code=code_obj.code,
|
377
|
+
config=config,
|
378
|
+
repl_tool=repl_tool,
|
379
|
+
function_signatures=function_signatures,
|
380
|
+
**kwargs,
|
381
|
+
)
|
382
|
+
|
383
|
+
|
384
|
+
class PythonCodeToExecute(BaseModel):
|
385
|
+
code: str = Field(description="Python code to execute")
|
291
386
|
|
292
|
-
|
387
|
+
|
388
|
+
def _with_structured_output(
|
293
389
|
client: BaseChatModel,
|
294
390
|
response_model: Type["PydanticModelT | Partial[PydanticModelT]"],
|
295
391
|
structured_output_kwargs: dict[str, Any],
|
@@ -297,75 +393,188 @@ def with_structured_output(
|
|
297
393
|
return client.with_structured_output(schema=response_model, **structured_output_kwargs) # pyright: ignore[reportUnknownVariableType, reportUnknownMemberType]
|
298
394
|
|
299
395
|
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
396
|
+
def _add_message_last(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
|
397
|
+
if isinstance(messages, str):
|
398
|
+
messages += f"\n{prompt_to_add}"
|
399
|
+
elif isinstance(messages, Sequence):
|
400
|
+
messages = list(messages)
|
401
|
+
messages.append(SystemMessage(content=prompt_to_add))
|
402
|
+
else:
|
403
|
+
messages = messages.to_messages()
|
404
|
+
messages.append(SystemMessage(content=prompt_to_add))
|
405
|
+
return messages
|
406
|
+
|
407
|
+
|
408
|
+
# def _add_message_first(messages: LanguageModelInput, prompt_to_add: str) -> LanguageModelInput:
|
409
|
+
# if isinstance(messages, str):
|
410
|
+
# messages = f"{prompt_to_add}\n{messages}"
|
411
|
+
# elif isinstance(messages, Sequence):
|
412
|
+
# messages = list(messages)
|
413
|
+
# messages.insert(0, SystemMessage(content=prompt_to_add))
|
414
|
+
# else:
|
415
|
+
# messages = messages.to_messages()
|
416
|
+
# messages.insert(0, SystemMessage(content=prompt_to_add))
|
417
|
+
# return messages
|
418
|
+
|
419
|
+
|
420
|
+
def augment_prompt_for_toolcall(
|
421
|
+
function_signatures: Iterable[FunctionSignature],
|
422
|
+
messages: LanguageModelInput,
|
423
|
+
prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
|
424
|
+
function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
|
425
|
+
function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
|
426
|
+
) -> LanguageModelInput:
|
427
|
+
if function_signatures:
|
428
|
+
messages = _add_message_last(
|
429
|
+
messages=messages,
|
430
|
+
prompt_to_add=FunctionSignature.as_prompt(
|
431
|
+
function_signatures, function_reference_prefix, function_reference_seperator
|
432
|
+
),
|
433
|
+
)
|
434
|
+
if prompt_for_code_invoke:
|
435
|
+
messages = _add_message_last(messages=messages, prompt_to_add=prompt_for_code_invoke)
|
436
|
+
return messages
|
437
|
+
|
438
|
+
|
439
|
+
def interactive_shell(
|
440
|
+
chatterer: Chatterer = Chatterer.openai(),
|
441
|
+
system_instruction: BaseMessage | Iterable[BaseMessage] = ([
|
442
|
+
SystemMessage("You are an AI that can answer questions and execute Python code."),
|
443
|
+
]),
|
444
|
+
repl_tool: Optional["PythonAstREPLTool"] = None,
|
445
|
+
prompt_for_code_invoke: Optional[str] = DEFAULT_CODE_GENERATION_PROMPT,
|
446
|
+
additional_callables: Optional[Callable[..., object] | Sequence[Callable[..., object]]] = None,
|
447
|
+
function_reference_prefix: Optional[str] = DEFAULT_FUNCTION_REFERENCE_PREFIX_PROMPT,
|
448
|
+
function_reference_seperator: str = DEFAULT_FUNCTION_REFERENCE_SEPARATOR,
|
449
|
+
config: Optional[RunnableConfig] = None,
|
450
|
+
stop: Optional[list[str]] = None,
|
451
|
+
**kwargs: Any,
|
452
|
+
) -> None:
|
453
|
+
# Define the CodeExecutionDecision class using Pydantic
|
454
|
+
|
455
|
+
from rich.console import Console
|
456
|
+
from rich.prompt import Prompt
|
457
|
+
|
458
|
+
class IsCodeExecutionNeeded(BaseModel):
|
459
|
+
is_code_execution_needed: bool = Field(
|
460
|
+
description="Whether Python tool calling is needed to answer user query."
|
461
|
+
)
|
360
462
|
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
463
|
+
class IsFurtherCodeExecutionNeeded(BaseModel):
|
464
|
+
review_on_code_execution: str = Field(description="Review on the code execution.")
|
465
|
+
next_action: str = Field(description="Next action to take.")
|
466
|
+
is_further_code_execution_needed: bool = Field(
|
467
|
+
description="Whether further Python tool calling is needed to answer user query."
|
468
|
+
)
|
469
|
+
|
470
|
+
# Get default REPL tool if not provided.
|
471
|
+
# This tool namespace is persistent across multiple code executions.
|
472
|
+
if repl_tool is None:
|
473
|
+
repl_tool = get_default_repl_tool()
|
474
|
+
|
475
|
+
function_signatures: list[FunctionSignature] = FunctionSignature.from_callable(additional_callables)
|
476
|
+
|
477
|
+
# Initialize Rich console
|
478
|
+
console = Console()
|
479
|
+
|
480
|
+
# Initialize conversation context
|
481
|
+
context: list[BaseMessage] = []
|
482
|
+
if system_instruction:
|
483
|
+
if isinstance(system_instruction, BaseMessage):
|
484
|
+
context.append(system_instruction)
|
485
|
+
else:
|
486
|
+
context.extend(system_instruction)
|
487
|
+
|
488
|
+
# Display welcome message
|
489
|
+
console.print("[bold blue]Welcome to the Interactive Chatterer Shell![/bold blue]")
|
490
|
+
console.print("Type 'quit' or 'exit' to end the conversation.")
|
491
|
+
|
492
|
+
while True:
|
493
|
+
# Get user input
|
494
|
+
user_input = Prompt.ask("[bold green]You[/bold green]")
|
495
|
+
if user_input.lower() in ["quit", "exit"]:
|
496
|
+
console.print("[bold blue]Goodbye![/bold blue]")
|
497
|
+
break
|
498
|
+
|
499
|
+
# Add user message to context
|
500
|
+
context.append(HumanMessage(content=user_input))
|
501
|
+
|
502
|
+
# Determine if code execution is needed
|
503
|
+
decision = chatterer.generate_pydantic(
|
504
|
+
response_model=IsCodeExecutionNeeded, # Use response_model instead of pydantic_model
|
505
|
+
messages=augment_prompt_for_toolcall(
|
506
|
+
function_signatures=function_signatures,
|
507
|
+
messages=context,
|
508
|
+
prompt_for_code_invoke=prompt_for_code_invoke,
|
509
|
+
function_reference_prefix=function_reference_prefix,
|
510
|
+
function_reference_seperator=function_reference_seperator,
|
511
|
+
),
|
512
|
+
)
|
513
|
+
|
514
|
+
if decision.is_code_execution_needed:
|
515
|
+
# Execute code if needed
|
516
|
+
code_result = chatterer.invoke_code_execution(
|
517
|
+
messages=context,
|
518
|
+
repl_tool=repl_tool,
|
519
|
+
prompt_for_code_invoke=prompt_for_code_invoke,
|
520
|
+
function_signatures=function_signatures,
|
521
|
+
function_reference_prefix=function_reference_prefix,
|
522
|
+
function_reference_seperator=function_reference_seperator,
|
523
|
+
config=config,
|
524
|
+
stop=stop,
|
525
|
+
**kwargs,
|
526
|
+
)
|
527
|
+
|
528
|
+
if code_result.code.strip() == "pass":
|
529
|
+
tool_use_message = None
|
530
|
+
else:
|
531
|
+
code_session_messages: list[BaseMessage] = []
|
532
|
+
while True:
|
533
|
+
code_execution_message = SystemMessage(
|
534
|
+
content=f"Executed code:\n```python\n{code_result.code}\n```\nOutput:\n{code_result.output}"
|
535
|
+
)
|
536
|
+
code_session_messages.append(code_execution_message)
|
537
|
+
console.print("[bold yellow]Executed code:[/bold yellow]")
|
538
|
+
console.print(f"[code]{code_result.code}[/code]")
|
539
|
+
console.print("[bold yellow]Output:[/bold yellow]")
|
540
|
+
console.print(code_result.output)
|
541
|
+
|
542
|
+
decision = chatterer.generate_pydantic(
|
543
|
+
response_model=IsFurtherCodeExecutionNeeded, # Use response_model instead of pydantic_model
|
544
|
+
messages=augment_prompt_for_toolcall(
|
545
|
+
function_signatures=function_signatures,
|
546
|
+
messages=context + code_session_messages,
|
547
|
+
prompt_for_code_invoke=prompt_for_code_invoke,
|
548
|
+
function_reference_prefix=function_reference_prefix,
|
549
|
+
function_reference_seperator=function_reference_seperator,
|
550
|
+
),
|
551
|
+
)
|
552
|
+
review_on_code_execution = decision.review_on_code_execution
|
553
|
+
next_action = decision.next_action
|
554
|
+
console.print("[bold blue]AI:[/bold blue]")
|
555
|
+
console.print(f"-[bold yellow]Review on code execution:[/bold yellow] {review_on_code_execution}")
|
556
|
+
console.print(f"-[bold yellow]Next Action:[/bold yellow] {next_action}")
|
557
|
+
code_session_messages.append(
|
558
|
+
AIMessage(
|
559
|
+
content=f"- Review upon code execution: {decision.review_on_code_execution}\n- Next Action: {decision.next_action}"
|
560
|
+
)
|
561
|
+
)
|
562
|
+
if not decision.is_further_code_execution_needed:
|
563
|
+
tool_use_message = code_execution_message
|
564
|
+
break
|
565
|
+
else:
|
566
|
+
# No code execution required
|
567
|
+
tool_use_message = None
|
568
|
+
|
569
|
+
# Add system message to context
|
570
|
+
if tool_use_message:
|
571
|
+
context.append(tool_use_message)
|
572
|
+
|
573
|
+
# Generate and display chatbot response
|
574
|
+
response = chatterer.generate(messages=context) # Use generate instead of generate_response
|
575
|
+
context.append(AIMessage(content=response))
|
576
|
+
console.print(f"[bold blue]AI:[/bold blue] {response}")
|
577
|
+
|
578
|
+
|
579
|
+
if __name__ == "__main__":
|
580
|
+
interactive_shell()
|
chatterer/messages.py
CHANGED
@@ -1,8 +1,9 @@
|
|
1
|
-
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage
|
1
|
+
from langchain_core.messages import AIMessage, BaseMessage, FunctionMessage, HumanMessage, SystemMessage
|
2
2
|
|
3
3
|
__all__ = [
|
4
4
|
"AIMessage",
|
5
5
|
"BaseMessage",
|
6
6
|
"HumanMessage",
|
7
7
|
"SystemMessage",
|
8
|
+
"FunctionMessage",
|
8
9
|
]
|
chatterer/tools/__init__.py
CHANGED
@@ -6,6 +6,14 @@ from .convert_to_text import (
|
|
6
6
|
pdf_to_text,
|
7
7
|
pyscripts_to_snippets,
|
8
8
|
)
|
9
|
+
from .youtube import get_youtube_video_subtitle, get_youtube_video_details
|
10
|
+
|
11
|
+
|
12
|
+
def init_webpage_to_markdown():
|
13
|
+
from . import webpage_to_markdown
|
14
|
+
|
15
|
+
return webpage_to_markdown
|
16
|
+
|
9
17
|
|
10
18
|
__all__ = [
|
11
19
|
"html_to_markdown",
|
@@ -14,4 +22,7 @@ __all__ = [
|
|
14
22
|
"get_default_html_to_markdown_options",
|
15
23
|
"pyscripts_to_snippets",
|
16
24
|
"citation_chunker",
|
25
|
+
"init_webpage_to_markdown",
|
26
|
+
"get_youtube_video_subtitle",
|
27
|
+
"get_youtube_video_details",
|
17
28
|
]
|
@@ -0,0 +1,132 @@
|
|
1
|
+
import json
|
2
|
+
import unicodedata
|
3
|
+
import urllib.parse
|
4
|
+
from dataclasses import dataclass
|
5
|
+
from typing import Any, Optional, Self, cast
|
6
|
+
|
7
|
+
import requests
|
8
|
+
|
9
|
+
|
10
|
+
def get_youtube_video_details(
|
11
|
+
query: str,
|
12
|
+
) -> list[dict[str, Optional[str]]]:
|
13
|
+
"""Search for video metadata on YouTube using the given query. Returns a list of dictionaries containing `video_id`, `title`, `channel`, `duration`, `views`, `publish_time`, and `long_desc`."""
|
14
|
+
return [
|
15
|
+
{
|
16
|
+
"video_id": video_id,
|
17
|
+
"title": video.title,
|
18
|
+
"channel": video.channel,
|
19
|
+
"duration": video.duration,
|
20
|
+
"views": video.views,
|
21
|
+
"publish_time": video.publish_time,
|
22
|
+
"long_desc": video.long_desc,
|
23
|
+
}
|
24
|
+
for video in YoutubeSearchResult.from_query(base_url="https://youtube.com", query=query, max_results=10)
|
25
|
+
if (video_id := _get_video_id(video.url_suffix))
|
26
|
+
]
|
27
|
+
|
28
|
+
|
29
|
+
def get_youtube_video_subtitle(video_id: str) -> str:
|
30
|
+
"""Get the transcript of a YouTube video using the given video ID."""
|
31
|
+
|
32
|
+
from youtube_transcript_api._api import YouTubeTranscriptApi
|
33
|
+
|
34
|
+
get_transcript = YouTubeTranscriptApi.get_transcript # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
35
|
+
list_transcripts = YouTubeTranscriptApi.list_transcripts # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
|
36
|
+
|
37
|
+
result: str = ""
|
38
|
+
buffer_timestamp: str = "0s"
|
39
|
+
buffer_texts: list[str] = []
|
40
|
+
for entry in get_transcript(video_id, languages=(next(iter(list_transcripts(video_id))).language_code,)): # pyright: ignore[reportUnknownVariableType]
|
41
|
+
entry = cast(dict[object, object], entry)
|
42
|
+
text: str = str(entry.get("text", "")).strip().replace("\n", " ")
|
43
|
+
if not text:
|
44
|
+
continue
|
45
|
+
if len(buffer_texts) >= 10 or _is_special_char(text) or (buffer_texts and _is_special_char(buffer_texts[-1])):
|
46
|
+
result += f"[{buffer_timestamp}] {'. '.join(buffer_texts)}\n"
|
47
|
+
start = entry.get("start", 0)
|
48
|
+
if start:
|
49
|
+
buffer_timestamp = f"{start:.0f}s"
|
50
|
+
buffer_texts = [text]
|
51
|
+
else:
|
52
|
+
buffer_texts.append(text)
|
53
|
+
|
54
|
+
if buffer_texts:
|
55
|
+
result += f"[{buffer_timestamp}] {' '.join(buffer_texts)}"
|
56
|
+
return result
|
57
|
+
|
58
|
+
|
59
|
+
def _get_video_id(suffix: str) -> str:
|
60
|
+
return next(iter(urllib.parse.parse_qs(urllib.parse.urlparse(suffix).query)["v"]), "")
|
61
|
+
|
62
|
+
|
63
|
+
def _is_special_char(text: str) -> bool:
|
64
|
+
if not text:
|
65
|
+
return False
|
66
|
+
return not unicodedata.category(text[0]).startswith("L")
|
67
|
+
|
68
|
+
|
69
|
+
@dataclass
|
70
|
+
class YoutubeSearchResult:
|
71
|
+
url_suffix: str
|
72
|
+
id: Optional[str]
|
73
|
+
thumbnails: list[str]
|
74
|
+
title: Optional[str]
|
75
|
+
long_desc: Optional[str]
|
76
|
+
channel: Optional[str]
|
77
|
+
duration: Optional[str]
|
78
|
+
views: Optional[str]
|
79
|
+
publish_time: Optional[str]
|
80
|
+
|
81
|
+
@classmethod
|
82
|
+
def from_query(cls, base_url: str, query: str, max_results: int) -> list[Self]:
|
83
|
+
url: str = f"{base_url}/results?search_query={urllib.parse.quote_plus(query)}"
|
84
|
+
response: str = requests.get(url).text
|
85
|
+
while "ytInitialData" not in response:
|
86
|
+
response = requests.get(url).text
|
87
|
+
results: list[Self] = cls.parse_html(response)
|
88
|
+
return results[:max_results]
|
89
|
+
|
90
|
+
@classmethod
|
91
|
+
def parse_html(cls, html: str) -> list[Self]:
|
92
|
+
results: list[Self] = []
|
93
|
+
start: int = html.index("ytInitialData") + len("ytInitialData") + 3
|
94
|
+
end: int = html.index("};", start) + 1
|
95
|
+
data: Any = json.loads(html[start:end])
|
96
|
+
for contents in data["contents"]["twoColumnSearchResultsRenderer"]["primaryContents"]["sectionListRenderer"][
|
97
|
+
"contents"
|
98
|
+
]:
|
99
|
+
for video in contents["itemSectionRenderer"]["contents"]:
|
100
|
+
if "videoRenderer" in video.keys():
|
101
|
+
video_data = video.get("videoRenderer", {})
|
102
|
+
suffix = (
|
103
|
+
video_data.get("navigationEndpoint", {})
|
104
|
+
.get("commandMetadata", {})
|
105
|
+
.get("webCommandMetadata", {})
|
106
|
+
.get("url", None)
|
107
|
+
)
|
108
|
+
if not suffix:
|
109
|
+
continue
|
110
|
+
res = cls(
|
111
|
+
id=video_data.get("videoId", None),
|
112
|
+
thumbnails=[
|
113
|
+
thumb.get("url", None) for thumb in video_data.get("thumbnail", {}).get("thumbnails", [{}])
|
114
|
+
],
|
115
|
+
title=video_data.get("title", {}).get("runs", [[{}]])[0].get("text", None),
|
116
|
+
long_desc=video_data.get("descriptionSnippet", {}).get("runs", [{}])[0].get("text", None),
|
117
|
+
channel=video_data.get("longBylineText", {}).get("runs", [[{}]])[0].get("text", None),
|
118
|
+
duration=video_data.get("lengthText", {}).get("simpleText", 0),
|
119
|
+
views=video_data.get("viewCountText", {}).get("simpleText", 0),
|
120
|
+
publish_time=video_data.get("publishedTimeText", {}).get("simpleText", 0),
|
121
|
+
url_suffix=suffix,
|
122
|
+
)
|
123
|
+
results.append(res)
|
124
|
+
|
125
|
+
if results:
|
126
|
+
break
|
127
|
+
return results
|
128
|
+
|
129
|
+
|
130
|
+
if __name__ == "__main__":
|
131
|
+
print(get_youtube_video_details("BTS"))
|
132
|
+
# print(get_youtube_transcript("y7jrpS8GHxs"))
|
@@ -0,0 +1,15 @@
|
|
1
|
+
from .code_agent import (
|
2
|
+
CodeExecutionResult,
|
3
|
+
FunctionSignature,
|
4
|
+
get_default_repl_tool,
|
5
|
+
insert_callables_into_global,
|
6
|
+
)
|
7
|
+
from .image import Base64Image
|
8
|
+
|
9
|
+
__all__ = [
|
10
|
+
"Base64Image",
|
11
|
+
"FunctionSignature",
|
12
|
+
"CodeExecutionResult",
|
13
|
+
"get_default_repl_tool",
|
14
|
+
"insert_callables_into_global",
|
15
|
+
]
|
@@ -0,0 +1,138 @@
|
|
1
|
+
import inspect
|
2
|
+
import textwrap
|
3
|
+
from typing import (
|
4
|
+
TYPE_CHECKING,
|
5
|
+
Callable,
|
6
|
+
Iterable,
|
7
|
+
NamedTuple,
|
8
|
+
Optional,
|
9
|
+
Self,
|
10
|
+
)
|
11
|
+
|
12
|
+
from langchain_core.runnables.config import RunnableConfig
|
13
|
+
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from langchain_experimental.tools import PythonAstREPLTool
|
16
|
+
|
17
|
+
|
18
|
+
class FunctionSignature(NamedTuple):
|
19
|
+
name: str
|
20
|
+
callable: Callable[..., object]
|
21
|
+
signature: str
|
22
|
+
|
23
|
+
@classmethod
|
24
|
+
def from_callable(cls, callables: Optional[Callable[..., object] | Iterable[Callable[..., object]]]) -> list[Self]:
|
25
|
+
if callables is None:
|
26
|
+
return []
|
27
|
+
if callable(callables):
|
28
|
+
return [cls._from_callable(callables)]
|
29
|
+
return [cls._from_callable(callable) for callable in callables]
|
30
|
+
|
31
|
+
@classmethod
|
32
|
+
def _from_callable(cls, callable: Callable[..., object]) -> Self:
|
33
|
+
"""
|
34
|
+
Get the name and signature of a function as a string.
|
35
|
+
"""
|
36
|
+
# Determine if the function is async
|
37
|
+
is_async_func = inspect.iscoroutinefunction(callable)
|
38
|
+
function_def = "async def" if is_async_func else "def"
|
39
|
+
|
40
|
+
# Determine the function name based on the type of callable
|
41
|
+
if inspect.isfunction(callable):
|
42
|
+
# For regular Python functions, use __code__.co_name
|
43
|
+
function_name = callable.__code__.co_name
|
44
|
+
elif hasattr(callable, "name"):
|
45
|
+
# For StructuredTool or similar objects with a 'name' attribute
|
46
|
+
function_name = callable.name # type: ignore
|
47
|
+
elif hasattr(callable, "__name__"):
|
48
|
+
# For other callables with a __name__ attribute
|
49
|
+
function_name = callable.__name__
|
50
|
+
else:
|
51
|
+
# Fallback to the class name if no name is found
|
52
|
+
function_name = type(callable).__name__
|
53
|
+
|
54
|
+
# Build the signature string
|
55
|
+
signature = f"{function_def} {function_name}{inspect.signature(callable)}:"
|
56
|
+
docstring = inspect.getdoc(callable)
|
57
|
+
if docstring:
|
58
|
+
docstring = f'"""{docstring.strip()}"""'
|
59
|
+
return cls(
|
60
|
+
name=function_name, callable=callable, signature=f"{signature}\n{textwrap.indent(docstring, ' ')}"
|
61
|
+
)
|
62
|
+
else:
|
63
|
+
return cls(name=function_name, callable=callable, signature=signature)
|
64
|
+
|
65
|
+
@classmethod
|
66
|
+
def as_prompt(
|
67
|
+
cls,
|
68
|
+
function_signatures: Iterable[Self],
|
69
|
+
prefix: Optional[str] = "You can use the pre-made functions below without defining them:\n",
|
70
|
+
sep: str = "\n---\n",
|
71
|
+
) -> str:
|
72
|
+
"""
|
73
|
+
Generate a prompt string from a list of callables.
|
74
|
+
"""
|
75
|
+
body: str = sep.join(fsig.signature for fsig in function_signatures)
|
76
|
+
if prefix:
|
77
|
+
return f"{prefix}{body}"
|
78
|
+
return body
|
79
|
+
|
80
|
+
|
81
|
+
class CodeExecutionResult(NamedTuple):
|
82
|
+
code: str
|
83
|
+
output: str
|
84
|
+
|
85
|
+
@classmethod
|
86
|
+
def from_code(
|
87
|
+
cls,
|
88
|
+
code: str,
|
89
|
+
repl_tool: Optional["PythonAstREPLTool"] = None,
|
90
|
+
config: Optional[RunnableConfig] = None,
|
91
|
+
function_signatures: Optional[Iterable[FunctionSignature]] = None,
|
92
|
+
**kwargs: object,
|
93
|
+
) -> Self:
|
94
|
+
"""
|
95
|
+
Execute code using the Python Code Execution Language Model.
|
96
|
+
"""
|
97
|
+
if repl_tool is None:
|
98
|
+
repl_tool = get_default_repl_tool()
|
99
|
+
if function_signatures:
|
100
|
+
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
101
|
+
output = str(repl_tool.invoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
|
102
|
+
return cls(code=code, output=output)
|
103
|
+
|
104
|
+
@classmethod
|
105
|
+
async def afrom_code(
|
106
|
+
cls,
|
107
|
+
code: str,
|
108
|
+
repl_tool: Optional["PythonAstREPLTool"] = None,
|
109
|
+
config: Optional[RunnableConfig] = None,
|
110
|
+
function_signatures: Optional[Iterable[FunctionSignature]] = None,
|
111
|
+
**kwargs: object,
|
112
|
+
) -> Self:
|
113
|
+
"""
|
114
|
+
Execute code using the Python Code Execution Language Model asynchronously.
|
115
|
+
"""
|
116
|
+
if repl_tool is None:
|
117
|
+
repl_tool = get_default_repl_tool()
|
118
|
+
if function_signatures:
|
119
|
+
insert_callables_into_global(function_signatures=function_signatures, repl_tool=repl_tool)
|
120
|
+
output = str(await repl_tool.ainvoke(code, config=config, **kwargs)) # pyright: ignore[reportUnknownMemberType]
|
121
|
+
return cls(code=code, output=output)
|
122
|
+
|
123
|
+
|
124
|
+
def get_default_repl_tool() -> "PythonAstREPLTool":
|
125
|
+
from langchain_experimental.tools import PythonAstREPLTool
|
126
|
+
|
127
|
+
return PythonAstREPLTool()
|
128
|
+
|
129
|
+
|
130
|
+
def insert_callables_into_global(
|
131
|
+
function_signatures: Iterable[FunctionSignature], repl_tool: "PythonAstREPLTool"
|
132
|
+
) -> None:
|
133
|
+
"""Insert callables into the REPL tool's globals."""
|
134
|
+
repl_globals: Optional[dict[str, object]] = repl_tool.globals # pyright: ignore[reportUnknownMemberType]
|
135
|
+
if repl_globals is None:
|
136
|
+
repl_tool.globals = {fsig.name: fsig.callable for fsig in function_signatures}
|
137
|
+
else:
|
138
|
+
repl_globals.update({fsig.name: fsig.callable for fsig in function_signatures})
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: chatterer
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.10
|
4
4
|
Summary: The highest-level interface for various LLM APIs.
|
5
5
|
Requires-Python: >=3.12
|
6
6
|
Description-Content-Type: text/markdown
|
@@ -18,13 +18,17 @@ Requires-Dist: pillow>=11.1.0; extra == "conversion"
|
|
18
18
|
Requires-Dist: mistune>=3.1.2; extra == "conversion"
|
19
19
|
Requires-Dist: markitdown>=0.0.2; extra == "conversion"
|
20
20
|
Requires-Dist: pymupdf>=1.25.4; extra == "conversion"
|
21
|
+
Requires-Dist: youtube-transcript-api>=1.0.2; extra == "conversion"
|
22
|
+
Provides-Extra: langchain
|
23
|
+
Requires-Dist: chatterer[langchain-providers]; extra == "langchain"
|
24
|
+
Requires-Dist: langchain-experimental>=0.3.4; extra == "langchain"
|
21
25
|
Provides-Extra: langchain-providers
|
22
26
|
Requires-Dist: langchain-openai>=0.3.7; extra == "langchain-providers"
|
23
27
|
Requires-Dist: langchain-anthropic>=0.3.8; extra == "langchain-providers"
|
24
28
|
Requires-Dist: langchain-google-genai>=2.0.10; extra == "langchain-providers"
|
25
29
|
Requires-Dist: langchain-ollama>=0.2.3; extra == "langchain-providers"
|
26
30
|
Provides-Extra: all
|
27
|
-
Requires-Dist: chatterer[langchain
|
31
|
+
Requires-Dist: chatterer[langchain]; extra == "all"
|
28
32
|
Requires-Dist: chatterer[conversion]; extra == "all"
|
29
33
|
Requires-Dist: chatterer[dev]; extra == "all"
|
30
34
|
|
@@ -1,12 +1,13 @@
|
|
1
|
-
chatterer/__init__.py,sha256=
|
2
|
-
chatterer/language_model.py,sha256=
|
3
|
-
chatterer/messages.py,sha256
|
1
|
+
chatterer/__init__.py,sha256=BPgCQ6VWGBXSh8xJr_0bpM0hcOOUz0KoxcKxOd9GYyI,1388
|
2
|
+
chatterer/language_model.py,sha256=qnVC5_W4IYM0y0o1PTYMGXUlblRv5fsRk0zIiL_vT3Q,24491
|
3
|
+
chatterer/messages.py,sha256=OtbZ3two0LUQ4PXES97FDIBUSO3IcMHdFV1VFkDL2mI,229
|
4
4
|
chatterer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
5
5
|
chatterer/strategies/__init__.py,sha256=SdOggbmHpw4f7Njwy-T8q64e91OLOUp1k0a0ozZd4qI,221
|
6
6
|
chatterer/strategies/atom_of_thoughts.py,sha256=CygOCLu5vLk-fzY9O-iE3qLShfjD7iY40ks9jH4ULBM,40872
|
7
7
|
chatterer/strategies/base.py,sha256=b2gMPqodp97OP1dkHfj0UqixjdjVhmTw_V5qJ7i2S6g,427
|
8
|
-
chatterer/tools/__init__.py,sha256=
|
8
|
+
chatterer/tools/__init__.py,sha256=hmWIuLJWotGQodL__i4LLbHdXe7Nl5uKHqNke9tHMro,705
|
9
9
|
chatterer/tools/convert_to_text.py,sha256=kBqxCJ0IoiAw2eiPYqep_SPZm-TtYKF7mdACLsWQUuI,15915
|
10
|
+
chatterer/tools/youtube.py,sha256=5hwASZWA92d_7Y5RqlCK2tULRBQx8bGnE_0NCnvaKi0,5499
|
10
11
|
chatterer/tools/citation_chunking/__init__.py,sha256=gG7Fnkkp28UpcWMbfMY_4gqzZSZ8QzlhalHBoeoq7K0,82
|
11
12
|
chatterer/tools/citation_chunking/chunks.py,sha256=50Dpa43RaYftlNox8tM1qI8htZ3_AJ9Uyyn02WsmxYk,2173
|
12
13
|
chatterer/tools/citation_chunking/citation_chunker.py,sha256=yx5O9pUkowlNcFyyNf7f3sbq7-CV8AXOzFnviDldPR8,4894
|
@@ -17,8 +18,10 @@ chatterer/tools/citation_chunking/utils.py,sha256=M4pH2-UIE1VLzQLXDqjEe4L3Xcy0e0
|
|
17
18
|
chatterer/tools/webpage_to_markdown/__init__.py,sha256=bHH4qfnXyw8Zz-yBPLaTezF1sh9njvNBJmhBVtcpjsA,123
|
18
19
|
chatterer/tools/webpage_to_markdown/playwright_bot.py,sha256=yP0KixYZNQ4Kn_ZCFDI3mVyBD_DpUGfqgklpaGJUTCU,27496
|
19
20
|
chatterer/tools/webpage_to_markdown/utils.py,sha256=ZLUU94imYciEdynD2K7Dmcsbt8BVQTaOP56Ba6DAFvk,12593
|
21
|
+
chatterer/utils/__init__.py,sha256=8nzpFJKU_wSRPH6LBP6HRBotPMrSl_VO9UlmFprTrK0,334
|
22
|
+
chatterer/utils/code_agent.py,sha256=UaWdeGzJMPzRSFy9yrxuveBJsvOPSa0te6OuE18bees,5143
|
20
23
|
chatterer/utils/image.py,sha256=F3_D1677UDFlgp-UQBS_ChkNODzf_VOfjYNSUi02MaI,10852
|
21
|
-
chatterer-0.1.
|
22
|
-
chatterer-0.1.
|
23
|
-
chatterer-0.1.
|
24
|
-
chatterer-0.1.
|
24
|
+
chatterer-0.1.10.dist-info/METADATA,sha256=qPx7b41yUvBG0XFH4ra89LIyOSUZUo_8gZ-adVkTKME,4458
|
25
|
+
chatterer-0.1.10.dist-info/WHEEL,sha256=1tXe9gY0PYatrMPMDd6jXqjfpz_B-Wqm32CPfRC58XU,91
|
26
|
+
chatterer-0.1.10.dist-info/top_level.txt,sha256=7nSQKP0bHxPRc7HyzdbKsJdkvPgYD0214o6slRizv9s,10
|
27
|
+
chatterer-0.1.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|