chatterer 0.1.18__py3-none-any.whl → 0.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- chatterer/__init__.py +93 -93
- chatterer/common_types/__init__.py +21 -21
- chatterer/common_types/io.py +19 -19
- chatterer/examples/__init__.py +0 -0
- chatterer/examples/anything_to_markdown.py +95 -91
- chatterer/examples/get_code_snippets.py +64 -62
- chatterer/examples/login_with_playwright.py +171 -167
- chatterer/examples/make_ppt.py +499 -497
- chatterer/examples/pdf_to_markdown.py +107 -107
- chatterer/examples/pdf_to_text.py +60 -56
- chatterer/examples/transcription_api.py +127 -123
- chatterer/examples/upstage_parser.py +95 -100
- chatterer/examples/webpage_to_markdown.py +79 -79
- chatterer/interactive.py +354 -354
- chatterer/language_model.py +533 -533
- chatterer/messages.py +21 -21
- chatterer/strategies/__init__.py +13 -13
- chatterer/strategies/atom_of_thoughts.py +975 -975
- chatterer/strategies/base.py +14 -14
- chatterer/tools/__init__.py +46 -46
- chatterer/tools/caption_markdown_images.py +384 -384
- chatterer/tools/citation_chunking/__init__.py +3 -3
- chatterer/tools/citation_chunking/chunks.py +53 -53
- chatterer/tools/citation_chunking/citation_chunker.py +118 -118
- chatterer/tools/citation_chunking/citations.py +285 -285
- chatterer/tools/citation_chunking/prompt.py +157 -157
- chatterer/tools/citation_chunking/reference.py +26 -26
- chatterer/tools/citation_chunking/utils.py +138 -138
- chatterer/tools/convert_pdf_to_markdown.py +302 -302
- chatterer/tools/convert_to_text.py +447 -447
- chatterer/tools/upstage_document_parser.py +705 -705
- chatterer/tools/webpage_to_markdown.py +739 -739
- chatterer/tools/youtube.py +146 -146
- chatterer/utils/__init__.py +15 -15
- chatterer/utils/base64_image.py +285 -285
- chatterer/utils/bytesio.py +59 -59
- chatterer/utils/code_agent.py +237 -237
- chatterer/utils/imghdr.py +148 -148
- {chatterer-0.1.18.dist-info → chatterer-0.1.19.dist-info}/METADATA +392 -392
- chatterer-0.1.19.dist-info/RECORD +44 -0
- {chatterer-0.1.18.dist-info → chatterer-0.1.19.dist-info}/WHEEL +1 -1
- chatterer-0.1.19.dist-info/entry_points.txt +10 -0
- chatterer-0.1.18.dist-info/RECORD +0 -42
- {chatterer-0.1.18.dist-info → chatterer-0.1.19.dist-info}/top_level.txt +0 -0
chatterer/__init__.py
CHANGED
@@ -1,93 +1,93 @@
|
|
1
|
-
from .interactive import interactive_shell
|
2
|
-
from .language_model import Chatterer
|
3
|
-
from .messages import (
|
4
|
-
AIMessage,
|
5
|
-
BaseMessage,
|
6
|
-
BaseMessageChunk,
|
7
|
-
FunctionMessage,
|
8
|
-
HumanMessage,
|
9
|
-
LanguageModelInput,
|
10
|
-
SystemMessage,
|
11
|
-
UsageMetadata,
|
12
|
-
)
|
13
|
-
from .strategies import (
|
14
|
-
AoTPipeline,
|
15
|
-
AoTPrompter,
|
16
|
-
AoTStrategy,
|
17
|
-
BaseStrategy,
|
18
|
-
)
|
19
|
-
from .tools import (
|
20
|
-
CodeSnippets,
|
21
|
-
MarkdownLink,
|
22
|
-
PdfToMarkdown,
|
23
|
-
PlayWrightBot,
|
24
|
-
PlaywrightLaunchOptions,
|
25
|
-
PlaywrightOptions,
|
26
|
-
PlaywrightPersistencyOptions,
|
27
|
-
UpstageDocumentParseParser,
|
28
|
-
acaption_markdown_images,
|
29
|
-
anything_to_markdown,
|
30
|
-
caption_markdown_images,
|
31
|
-
citation_chunker,
|
32
|
-
extract_text_from_pdf,
|
33
|
-
get_default_html_to_markdown_options,
|
34
|
-
get_default_playwright_launch_options,
|
35
|
-
get_youtube_video_details,
|
36
|
-
get_youtube_video_subtitle,
|
37
|
-
html_to_markdown,
|
38
|
-
open_pdf,
|
39
|
-
pdf_to_text,
|
40
|
-
pyscripts_to_snippets,
|
41
|
-
render_pdf_as_image,
|
42
|
-
)
|
43
|
-
from .utils import (
|
44
|
-
Base64Image,
|
45
|
-
CodeExecutionResult,
|
46
|
-
FunctionSignature,
|
47
|
-
get_default_repl_tool,
|
48
|
-
insert_callables_into_global,
|
49
|
-
)
|
50
|
-
|
51
|
-
__all__ = [
|
52
|
-
"BaseStrategy",
|
53
|
-
"Chatterer",
|
54
|
-
"AoTStrategy",
|
55
|
-
"AoTPipeline",
|
56
|
-
"AoTPrompter",
|
57
|
-
"html_to_markdown",
|
58
|
-
"anything_to_markdown",
|
59
|
-
"pdf_to_text",
|
60
|
-
"get_default_html_to_markdown_options",
|
61
|
-
"pyscripts_to_snippets",
|
62
|
-
"citation_chunker",
|
63
|
-
"BaseMessage",
|
64
|
-
"HumanMessage",
|
65
|
-
"SystemMessage",
|
66
|
-
"AIMessage",
|
67
|
-
"FunctionMessage",
|
68
|
-
"Base64Image",
|
69
|
-
"FunctionSignature",
|
70
|
-
"CodeExecutionResult",
|
71
|
-
"get_default_repl_tool",
|
72
|
-
"insert_callables_into_global",
|
73
|
-
"get_youtube_video_subtitle",
|
74
|
-
"get_youtube_video_details",
|
75
|
-
"interactive_shell",
|
76
|
-
"UpstageDocumentParseParser",
|
77
|
-
"BaseMessageChunk",
|
78
|
-
"CodeSnippets",
|
79
|
-
"LanguageModelInput",
|
80
|
-
"UsageMetadata",
|
81
|
-
"PlayWrightBot",
|
82
|
-
"PlaywrightLaunchOptions",
|
83
|
-
"PlaywrightOptions",
|
84
|
-
"PlaywrightPersistencyOptions",
|
85
|
-
"get_default_playwright_launch_options",
|
86
|
-
"acaption_markdown_images",
|
87
|
-
"caption_markdown_images",
|
88
|
-
"MarkdownLink",
|
89
|
-
"PdfToMarkdown",
|
90
|
-
"extract_text_from_pdf",
|
91
|
-
"open_pdf",
|
92
|
-
"render_pdf_as_image",
|
93
|
-
]
|
1
|
+
from .interactive import interactive_shell
|
2
|
+
from .language_model import Chatterer
|
3
|
+
from .messages import (
|
4
|
+
AIMessage,
|
5
|
+
BaseMessage,
|
6
|
+
BaseMessageChunk,
|
7
|
+
FunctionMessage,
|
8
|
+
HumanMessage,
|
9
|
+
LanguageModelInput,
|
10
|
+
SystemMessage,
|
11
|
+
UsageMetadata,
|
12
|
+
)
|
13
|
+
from .strategies import (
|
14
|
+
AoTPipeline,
|
15
|
+
AoTPrompter,
|
16
|
+
AoTStrategy,
|
17
|
+
BaseStrategy,
|
18
|
+
)
|
19
|
+
from .tools import (
|
20
|
+
CodeSnippets,
|
21
|
+
MarkdownLink,
|
22
|
+
PdfToMarkdown,
|
23
|
+
PlayWrightBot,
|
24
|
+
PlaywrightLaunchOptions,
|
25
|
+
PlaywrightOptions,
|
26
|
+
PlaywrightPersistencyOptions,
|
27
|
+
UpstageDocumentParseParser,
|
28
|
+
acaption_markdown_images,
|
29
|
+
anything_to_markdown,
|
30
|
+
caption_markdown_images,
|
31
|
+
citation_chunker,
|
32
|
+
extract_text_from_pdf,
|
33
|
+
get_default_html_to_markdown_options,
|
34
|
+
get_default_playwright_launch_options,
|
35
|
+
get_youtube_video_details,
|
36
|
+
get_youtube_video_subtitle,
|
37
|
+
html_to_markdown,
|
38
|
+
open_pdf,
|
39
|
+
pdf_to_text,
|
40
|
+
pyscripts_to_snippets,
|
41
|
+
render_pdf_as_image,
|
42
|
+
)
|
43
|
+
from .utils import (
|
44
|
+
Base64Image,
|
45
|
+
CodeExecutionResult,
|
46
|
+
FunctionSignature,
|
47
|
+
get_default_repl_tool,
|
48
|
+
insert_callables_into_global,
|
49
|
+
)
|
50
|
+
|
51
|
+
__all__ = [
|
52
|
+
"BaseStrategy",
|
53
|
+
"Chatterer",
|
54
|
+
"AoTStrategy",
|
55
|
+
"AoTPipeline",
|
56
|
+
"AoTPrompter",
|
57
|
+
"html_to_markdown",
|
58
|
+
"anything_to_markdown",
|
59
|
+
"pdf_to_text",
|
60
|
+
"get_default_html_to_markdown_options",
|
61
|
+
"pyscripts_to_snippets",
|
62
|
+
"citation_chunker",
|
63
|
+
"BaseMessage",
|
64
|
+
"HumanMessage",
|
65
|
+
"SystemMessage",
|
66
|
+
"AIMessage",
|
67
|
+
"FunctionMessage",
|
68
|
+
"Base64Image",
|
69
|
+
"FunctionSignature",
|
70
|
+
"CodeExecutionResult",
|
71
|
+
"get_default_repl_tool",
|
72
|
+
"insert_callables_into_global",
|
73
|
+
"get_youtube_video_subtitle",
|
74
|
+
"get_youtube_video_details",
|
75
|
+
"interactive_shell",
|
76
|
+
"UpstageDocumentParseParser",
|
77
|
+
"BaseMessageChunk",
|
78
|
+
"CodeSnippets",
|
79
|
+
"LanguageModelInput",
|
80
|
+
"UsageMetadata",
|
81
|
+
"PlayWrightBot",
|
82
|
+
"PlaywrightLaunchOptions",
|
83
|
+
"PlaywrightOptions",
|
84
|
+
"PlaywrightPersistencyOptions",
|
85
|
+
"get_default_playwright_launch_options",
|
86
|
+
"acaption_markdown_images",
|
87
|
+
"caption_markdown_images",
|
88
|
+
"MarkdownLink",
|
89
|
+
"PdfToMarkdown",
|
90
|
+
"extract_text_from_pdf",
|
91
|
+
"open_pdf",
|
92
|
+
"render_pdf_as_image",
|
93
|
+
]
|
@@ -1,21 +1,21 @@
|
|
1
|
-
from .io import (
|
2
|
-
BytesReadable,
|
3
|
-
BytesWritable,
|
4
|
-
FileDescriptorOrPath,
|
5
|
-
PathOrReadable,
|
6
|
-
Readable,
|
7
|
-
StringReadable,
|
8
|
-
StringWritable,
|
9
|
-
Writable,
|
10
|
-
)
|
11
|
-
|
12
|
-
__all__ = [
|
13
|
-
"BytesReadable",
|
14
|
-
"BytesWritable",
|
15
|
-
"FileDescriptorOrPath",
|
16
|
-
"PathOrReadable",
|
17
|
-
"Readable",
|
18
|
-
"StringReadable",
|
19
|
-
"StringWritable",
|
20
|
-
"Writable",
|
21
|
-
]
|
1
|
+
from .io import (
|
2
|
+
BytesReadable,
|
3
|
+
BytesWritable,
|
4
|
+
FileDescriptorOrPath,
|
5
|
+
PathOrReadable,
|
6
|
+
Readable,
|
7
|
+
StringReadable,
|
8
|
+
StringWritable,
|
9
|
+
Writable,
|
10
|
+
)
|
11
|
+
|
12
|
+
__all__ = [
|
13
|
+
"BytesReadable",
|
14
|
+
"BytesWritable",
|
15
|
+
"FileDescriptorOrPath",
|
16
|
+
"PathOrReadable",
|
17
|
+
"Readable",
|
18
|
+
"StringReadable",
|
19
|
+
"StringWritable",
|
20
|
+
"Writable",
|
21
|
+
]
|
chatterer/common_types/io.py
CHANGED
@@ -1,19 +1,19 @@
|
|
1
|
-
import os
|
2
|
-
from io import BufferedReader, BufferedWriter, BytesIO, StringIO, TextIOWrapper
|
3
|
-
from typing import TypeAlias
|
4
|
-
|
5
|
-
# Type aliases for callback functions and file descriptors
|
6
|
-
FileDescriptorOrPath: TypeAlias = int | str | bytes | os.PathLike[str] | os.PathLike[bytes]
|
7
|
-
|
8
|
-
# Type aliases for different types of IO objects
|
9
|
-
BytesReadable: TypeAlias = BytesIO | BufferedReader
|
10
|
-
BytesWritable: TypeAlias = BytesIO | BufferedWriter
|
11
|
-
StringReadable: TypeAlias = StringIO | TextIOWrapper
|
12
|
-
StringWritable: TypeAlias = StringIO | TextIOWrapper
|
13
|
-
|
14
|
-
# Combined type aliases for readable and writable objects
|
15
|
-
Readable: TypeAlias = BytesReadable | StringReadable
|
16
|
-
Writable: TypeAlias = BytesWritable | StringWritable
|
17
|
-
|
18
|
-
# Type alias for path or readable object
|
19
|
-
PathOrReadable: TypeAlias = FileDescriptorOrPath | Readable
|
1
|
+
import os
|
2
|
+
from io import BufferedReader, BufferedWriter, BytesIO, StringIO, TextIOWrapper
|
3
|
+
from typing import TypeAlias
|
4
|
+
|
5
|
+
# Type aliases for callback functions and file descriptors
|
6
|
+
FileDescriptorOrPath: TypeAlias = int | str | bytes | os.PathLike[str] | os.PathLike[bytes]
|
7
|
+
|
8
|
+
# Type aliases for different types of IO objects
|
9
|
+
BytesReadable: TypeAlias = BytesIO | BufferedReader
|
10
|
+
BytesWritable: TypeAlias = BytesIO | BufferedWriter
|
11
|
+
StringReadable: TypeAlias = StringIO | TextIOWrapper
|
12
|
+
StringWritable: TypeAlias = StringIO | TextIOWrapper
|
13
|
+
|
14
|
+
# Combined type aliases for readable and writable objects
|
15
|
+
Readable: TypeAlias = BytesReadable | StringReadable
|
16
|
+
Writable: TypeAlias = BytesWritable | StringWritable
|
17
|
+
|
18
|
+
# Type alias for path or readable object
|
19
|
+
PathOrReadable: TypeAlias = FileDescriptorOrPath | Readable
|
File without changes
|
@@ -1,91 +1,95 @@
|
|
1
|
-
def resolve_import_path_and_get_logger():
|
2
|
-
# ruff: noqa: E402
|
3
|
-
import logging
|
4
|
-
import sys
|
5
|
-
|
6
|
-
if __name__ == "__main__" and "." not in sys.path:
|
7
|
-
sys.path.append(".")
|
8
|
-
|
9
|
-
logger = logging.getLogger(__name__)
|
10
|
-
return logger
|
11
|
-
|
12
|
-
|
13
|
-
logger = resolve_import_path_and_get_logger()
|
14
|
-
from pathlib import Path
|
15
|
-
from typing import Optional, TypedDict
|
16
|
-
|
17
|
-
import openai
|
18
|
-
from spargear import ArgumentSpec, BaseArguments
|
19
|
-
|
20
|
-
from chatterer import anything_to_markdown
|
21
|
-
|
22
|
-
|
23
|
-
class AnythingToMarkdownReturns(TypedDict):
|
24
|
-
in_path: str
|
25
|
-
out_path: Optional[str]
|
26
|
-
out_text: str
|
27
|
-
|
28
|
-
|
29
|
-
class AnythingToMarkdownArguments(BaseArguments):
|
30
|
-
"""Command line arguments for converting various file types to markdown."""
|
31
|
-
|
32
|
-
in_path: ArgumentSpec[str] = ArgumentSpec(["in-path"], help="Input file to convert to markdown")
|
33
|
-
out_path: Optional[str] = None
|
34
|
-
"""Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
|
35
|
-
model: Optional[str] = None
|
36
|
-
"""OpenAI Model to use for conversion"""
|
37
|
-
api_key: Optional[str] = None
|
38
|
-
"""API key for OpenAI API"""
|
39
|
-
base_url: Optional[str] = None
|
40
|
-
"""Base URL for OpenAI API"""
|
41
|
-
style_map: Optional[str] = None
|
42
|
-
"""Output style map"""
|
43
|
-
exiftool_path: Optional[str] = None
|
44
|
-
""""Path to exiftool for metadata extraction"""
|
45
|
-
docintel_endpoint: Optional[str] = None
|
46
|
-
"Document Intelligence API endpoint"
|
47
|
-
prevent_save_file: bool = False
|
48
|
-
"""Prevent saving the converted file to disk."""
|
49
|
-
encoding: str = "utf-8"
|
50
|
-
"""Encoding for the output file."""
|
51
|
-
|
52
|
-
def run(self) -> AnythingToMarkdownReturns:
|
53
|
-
in_path = self.in_path.unwrap()
|
54
|
-
if not self.prevent_save_file:
|
55
|
-
if not self.out_path:
|
56
|
-
out_path = Path(in_path).with_suffix(".md")
|
57
|
-
else:
|
58
|
-
out_path = Path(self.out_path)
|
59
|
-
else:
|
60
|
-
out_path = None
|
61
|
-
|
62
|
-
if self.model:
|
63
|
-
llm_client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
|
64
|
-
llm_model = self.model
|
65
|
-
else:
|
66
|
-
llm_client = None
|
67
|
-
llm_model = None
|
68
|
-
|
69
|
-
text: str = anything_to_markdown(
|
70
|
-
in_path,
|
71
|
-
llm_client=llm_client,
|
72
|
-
llm_model=llm_model,
|
73
|
-
style_map=self.style_map,
|
74
|
-
exiftool_path=self.exiftool_path,
|
75
|
-
docintel_endpoint=self.docintel_endpoint,
|
76
|
-
)
|
77
|
-
if out_path:
|
78
|
-
out_path.parent.mkdir(parents=True, exist_ok=True)
|
79
|
-
out_path.write_text(text, encoding=self.encoding)
|
80
|
-
logger.info(f"Converted `{in_path}` to markdown and saved to `{out_path}`.")
|
81
|
-
else:
|
82
|
-
logger.info(f"Converted `{in_path}` to markdown.")
|
83
|
-
return {
|
84
|
-
"in_path": in_path,
|
85
|
-
"out_path": str(out_path) if out_path is not None else None,
|
86
|
-
"out_text": text,
|
87
|
-
}
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
AnythingToMarkdownArguments().run()
|
1
|
+
def resolve_import_path_and_get_logger():
|
2
|
+
# ruff: noqa: E402
|
3
|
+
import logging
|
4
|
+
import sys
|
5
|
+
|
6
|
+
if __name__ == "__main__" and "." not in sys.path:
|
7
|
+
sys.path.append(".")
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
return logger
|
11
|
+
|
12
|
+
|
13
|
+
logger = resolve_import_path_and_get_logger()
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Optional, TypedDict
|
16
|
+
|
17
|
+
import openai
|
18
|
+
from spargear import ArgumentSpec, BaseArguments
|
19
|
+
|
20
|
+
from chatterer import anything_to_markdown
|
21
|
+
|
22
|
+
|
23
|
+
class AnythingToMarkdownReturns(TypedDict):
|
24
|
+
in_path: str
|
25
|
+
out_path: Optional[str]
|
26
|
+
out_text: str
|
27
|
+
|
28
|
+
|
29
|
+
class AnythingToMarkdownArguments(BaseArguments):
|
30
|
+
"""Command line arguments for converting various file types to markdown."""
|
31
|
+
|
32
|
+
in_path: ArgumentSpec[str] = ArgumentSpec(["in-path"], help="Input file to convert to markdown")
|
33
|
+
out_path: Optional[str] = None
|
34
|
+
"""Output path for the converted markdown file. If not provided, the input file's suffix is replaced with .md"""
|
35
|
+
model: Optional[str] = None
|
36
|
+
"""OpenAI Model to use for conversion"""
|
37
|
+
api_key: Optional[str] = None
|
38
|
+
"""API key for OpenAI API"""
|
39
|
+
base_url: Optional[str] = None
|
40
|
+
"""Base URL for OpenAI API"""
|
41
|
+
style_map: Optional[str] = None
|
42
|
+
"""Output style map"""
|
43
|
+
exiftool_path: Optional[str] = None
|
44
|
+
""""Path to exiftool for metadata extraction"""
|
45
|
+
docintel_endpoint: Optional[str] = None
|
46
|
+
"Document Intelligence API endpoint"
|
47
|
+
prevent_save_file: bool = False
|
48
|
+
"""Prevent saving the converted file to disk."""
|
49
|
+
encoding: str = "utf-8"
|
50
|
+
"""Encoding for the output file."""
|
51
|
+
|
52
|
+
def run(self) -> AnythingToMarkdownReturns:
|
53
|
+
in_path = self.in_path.unwrap()
|
54
|
+
if not self.prevent_save_file:
|
55
|
+
if not self.out_path:
|
56
|
+
out_path = Path(in_path).with_suffix(".md")
|
57
|
+
else:
|
58
|
+
out_path = Path(self.out_path)
|
59
|
+
else:
|
60
|
+
out_path = None
|
61
|
+
|
62
|
+
if self.model:
|
63
|
+
llm_client = openai.OpenAI(api_key=self.api_key, base_url=self.base_url)
|
64
|
+
llm_model = self.model
|
65
|
+
else:
|
66
|
+
llm_client = None
|
67
|
+
llm_model = None
|
68
|
+
|
69
|
+
text: str = anything_to_markdown(
|
70
|
+
in_path,
|
71
|
+
llm_client=llm_client,
|
72
|
+
llm_model=llm_model,
|
73
|
+
style_map=self.style_map,
|
74
|
+
exiftool_path=self.exiftool_path,
|
75
|
+
docintel_endpoint=self.docintel_endpoint,
|
76
|
+
)
|
77
|
+
if out_path:
|
78
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
79
|
+
out_path.write_text(text, encoding=self.encoding)
|
80
|
+
logger.info(f"Converted `{in_path}` to markdown and saved to `{out_path}`.")
|
81
|
+
else:
|
82
|
+
logger.info(f"Converted `{in_path}` to markdown.")
|
83
|
+
return {
|
84
|
+
"in_path": in_path,
|
85
|
+
"out_path": str(out_path) if out_path is not None else None,
|
86
|
+
"out_text": text,
|
87
|
+
}
|
88
|
+
|
89
|
+
|
90
|
+
def main() -> None:
|
91
|
+
AnythingToMarkdownArguments().run()
|
92
|
+
|
93
|
+
|
94
|
+
if __name__ == "__main__":
|
95
|
+
main()
|
@@ -1,62 +1,64 @@
|
|
1
|
-
def resolve_import_path_and_get_logger():
|
2
|
-
# ruff: noqa: E402
|
3
|
-
import logging
|
4
|
-
import sys
|
5
|
-
|
6
|
-
if __name__ == "__main__" and "." not in sys.path:
|
7
|
-
sys.path.append(".")
|
8
|
-
|
9
|
-
logger = logging.getLogger(__name__)
|
10
|
-
return logger
|
11
|
-
|
12
|
-
|
13
|
-
logger = resolve_import_path_and_get_logger()
|
14
|
-
from pathlib import Path
|
15
|
-
from typing import Optional
|
16
|
-
|
17
|
-
from spargear import ArgumentSpec, BaseArguments
|
18
|
-
|
19
|
-
from chatterer import CodeSnippets
|
20
|
-
|
21
|
-
|
22
|
-
class GetCodeSnippetsArgs(BaseArguments):
|
23
|
-
path_or_pkgname: ArgumentSpec[str] = ArgumentSpec(
|
24
|
-
|
25
|
-
)
|
26
|
-
|
27
|
-
|
28
|
-
"""List of
|
29
|
-
|
30
|
-
"""
|
31
|
-
|
32
|
-
"""
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
out_path = Path(
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
logger.info(f"Extracted code snippets from `{path_or_pkgname}
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
1
|
+
def resolve_import_path_and_get_logger():
|
2
|
+
# ruff: noqa: E402
|
3
|
+
import logging
|
4
|
+
import sys
|
5
|
+
|
6
|
+
if __name__ == "__main__" and "." not in sys.path:
|
7
|
+
sys.path.append(".")
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
return logger
|
11
|
+
|
12
|
+
|
13
|
+
logger = resolve_import_path_and_get_logger()
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Optional
|
16
|
+
|
17
|
+
from spargear import ArgumentSpec, BaseArguments
|
18
|
+
|
19
|
+
from chatterer import CodeSnippets
|
20
|
+
|
21
|
+
|
22
|
+
class GetCodeSnippetsArgs(BaseArguments):
|
23
|
+
path_or_pkgname: ArgumentSpec[str] = ArgumentSpec(["path_or_pkgname"], help="Path to the package or file from which to extract code snippets.")
|
24
|
+
out_path: Optional[str] = None
|
25
|
+
ban_file_patterns: list[str] = [".venv/*", Path(__file__).relative_to(Path.cwd()).as_posix()]
|
26
|
+
"""List of file patterns to ignore."""
|
27
|
+
glob_patterns: list[str] = ["*.py"]
|
28
|
+
"""List of glob patterns to include."""
|
29
|
+
case_sensitive: bool = False
|
30
|
+
"""Enable case-sensitive matching for glob patterns."""
|
31
|
+
prevent_save_file: bool = False
|
32
|
+
"""Prevent saving the extracted code snippets to a file."""
|
33
|
+
|
34
|
+
def run(self) -> CodeSnippets:
|
35
|
+
path_or_pkgname = self.path_or_pkgname.unwrap()
|
36
|
+
if not self.prevent_save_file:
|
37
|
+
if not self.out_path:
|
38
|
+
out_path = Path(__file__).with_suffix(".txt")
|
39
|
+
else:
|
40
|
+
out_path = Path(self.out_path)
|
41
|
+
else:
|
42
|
+
out_path = None
|
43
|
+
|
44
|
+
cs = CodeSnippets.from_path_or_pkgname(
|
45
|
+
path_or_pkgname=path_or_pkgname,
|
46
|
+
ban_file_patterns=self.ban_file_patterns,
|
47
|
+
glob_patterns=self.glob_patterns,
|
48
|
+
case_sensitive=self.case_sensitive,
|
49
|
+
)
|
50
|
+
if out_path is not None:
|
51
|
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
52
|
+
out_path.write_text(cs.snippets_text, encoding="utf-8")
|
53
|
+
logger.info(f"Extracted code snippets from `{path_or_pkgname}` and saved to `{out_path}`.")
|
54
|
+
else:
|
55
|
+
logger.info(f"Extracted code snippets from `{path_or_pkgname}`.")
|
56
|
+
return cs
|
57
|
+
|
58
|
+
|
59
|
+
def main() -> None:
|
60
|
+
GetCodeSnippetsArgs().run()
|
61
|
+
|
62
|
+
|
63
|
+
if __name__ == "__main__":
|
64
|
+
main()
|