awschain 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- awschain-0.1.0/LICENSE +21 -0
- awschain-0.1.0/MANIFEST.in +9 -0
- awschain-0.1.0/PKG-INFO +128 -0
- awschain-0.1.0/README.md +108 -0
- awschain-0.1.0/pyproject.toml +3 -0
- awschain-0.1.0/requirements.txt +3 -0
- awschain-0.1.0/setup.cfg +4 -0
- awschain-0.1.0/setup.py +39 -0
- awschain-0.1.0/src/awschain/__init__.py +4 -0
- awschain-0.1.0/src/awschain/example.py +240 -0
- awschain-0.1.0/src/awschain/handlers/__init__.py +5 -0
- awschain-0.1.0/src/awschain/handlers/abstract_handler.py +26 -0
- awschain-0.1.0/src/awschain/handlers/base_handler.py +17 -0
- awschain-0.1.0/src/awschain/handlers/handler_factory.py +57 -0
- awschain-0.1.0/src/awschain/handlers/misc/__init__.py +0 -0
- awschain-0.1.0/src/awschain/handlers/misc/clipboard_writer_handler.py +8 -0
- awschain-0.1.0/src/awschain/handlers/misc/print_context_handler.py +14 -0
- awschain-0.1.0/src/awschain/handlers/misc/remote_file_downloader_handler.py +43 -0
- awschain-0.1.0/src/awschain/handlers/processors/__init__.py +0 -0
- awschain-0.1.0/src/awschain/handlers/processors/amazon_bedrock_chat_handler.py +32 -0
- awschain-0.1.0/src/awschain/handlers/processors/amazon_bedrock_handler.py +62 -0
- awschain-0.1.0/src/awschain/handlers/processors/amazon_comprehend_insights_handler.py +95 -0
- awschain-0.1.0/src/awschain/handlers/processors/amazon_comprehend_pii_classifier_handler.py +42 -0
- awschain-0.1.0/src/awschain/handlers/processors/amazon_comprehend_pii_handler.py +48 -0
- awschain-0.1.0/src/awschain/handlers/processors/amazon_comprehend_pii_tokenize_handler.py +119 -0
- awschain-0.1.0/src/awschain/handlers/processors/amazon_comprehend_pii_untokenize_handler.py +46 -0
- awschain-0.1.0/src/awschain/handlers/processors/amazon_rekognition_handler.py +66 -0
- awschain-0.1.0/src/awschain/handlers/processors/amazon_textract_handler.py +138 -0
- awschain-0.1.0/src/awschain/handlers/processors/amazon_transcribe_handler.py +91 -0
- awschain-0.1.0/src/awschain/handlers/processors/anonymize_handler.py +30 -0
- awschain-0.1.0/src/awschain/handlers/processors/html_cleaner_handler.py +11 -0
- awschain-0.1.0/src/awschain/handlers/processors/prompt_handler.py +27 -0
- awschain-0.1.0/src/awschain/handlers/readers/__init__.py +0 -0
- awschain-0.1.0/src/awschain/handlers/readers/amazon_s3_reader_handler.py +35 -0
- awschain-0.1.0/src/awschain/handlers/readers/aws_secrets_manager_secret_reader.py +50 -0
- awschain-0.1.0/src/awschain/handlers/readers/email_reader_handler.py +83 -0
- awschain-0.1.0/src/awschain/handlers/readers/http_handler.py +140 -0
- awschain-0.1.0/src/awschain/handlers/readers/local_file_reader_handler.py +22 -0
- awschain-0.1.0/src/awschain/handlers/readers/microsoft_excel_reader_handler.py +84 -0
- awschain-0.1.0/src/awschain/handlers/readers/microsoft_power_point_reader_handler.py +95 -0
- awschain-0.1.0/src/awschain/handlers/readers/microsoft_word_handler.py +112 -0
- awschain-0.1.0/src/awschain/handlers/readers/pdf_reader_handler.py +81 -0
- awschain-0.1.0/src/awschain/handlers/readers/quip_reader_handler.py +148 -0
- awschain-0.1.0/src/awschain/handlers/readers/web_crawler_reader_handler.py +52 -0
- awschain-0.1.0/src/awschain/handlers/readers/youtube_reader_handler.py +39 -0
- awschain-0.1.0/src/awschain/handlers/writers/__init__.py +0 -0
- awschain-0.1.0/src/awschain/handlers/writers/amazon_datazone_asset_writer_handler.py +87 -0
- awschain-0.1.0/src/awschain/handlers/writers/amazon_datazone_glossary_writer_handler.py +92 -0
- awschain-0.1.0/src/awschain/handlers/writers/amazon_s3_writer_handler.py +118 -0
- awschain-0.1.0/src/awschain/handlers/writers/email_sender_handler.py +56 -0
- awschain-0.1.0/src/awschain/handlers/writers/local_file_writer_handler.py +51 -0
- awschain-0.1.0/src/awschain/handlers/writers/quip_writer_handler.py +85 -0
- awschain-0.1.0/src/awschain/utils/__init__.py +2 -0
- awschain-0.1.0/src/awschain/utils/aws_boto_client_manager.py +16 -0
- awschain-0.1.0/src/awschain/utils/bedrock.py +70 -0
- awschain-0.1.0/src/awschain/utils/config.py +1 -0
- awschain-0.1.0/src/awschain/utils/config_loader.py +43 -0
- awschain-0.1.0/src/awschain/utils/web_utils.py +22 -0
- awschain-0.1.0/src/awschain.egg-info/PKG-INFO +128 -0
- awschain-0.1.0/src/awschain.egg-info/SOURCES.txt +63 -0
- awschain-0.1.0/src/awschain.egg-info/dependency_links.txt +1 -0
- awschain-0.1.0/src/awschain.egg-info/not-zip-safe +1 -0
- awschain-0.1.0/src/awschain.egg-info/requires.txt +3 -0
- awschain-0.1.0/src/awschain.egg-info/top_level.txt +1 -0
- awschain-0.1.0/tests/test_misc_handlers.py +43 -0
awschain-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Kamen Sharlandjiev
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include requirements.txt
|
|
3
|
+
recursive-include src *.py
|
|
4
|
+
recursive-include src/awschain *.py
|
|
5
|
+
recursive-include src/awschain/handlers *.py
|
|
6
|
+
global-exclude __pycache__
|
|
7
|
+
global-exclude *.py[cod]
|
|
8
|
+
include src/awschain/__init__.py
|
|
9
|
+
include src/awschain/handlers/__init__.py
|
awschain-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: awschain
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A framework for chaining AWS services using the chain of responsibility pattern
|
|
5
|
+
Home-page: https://github.com/ksharlandjiev/awschain
|
|
6
|
+
Author: Kamen Sharlandjiev
|
|
7
|
+
Author-email: ksharlandjiev@gmail.com
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Requires-Python: >=3.7
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: requests
|
|
18
|
+
Requires-Dist: boto3
|
|
19
|
+
Requires-Dist: python-dotenv
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# awschain
|
|
23
|
+
|
|
24
|
+
`awschain` is a Python package that provides a flexible and extensible implementation of the **Chain of Responsibility** design pattern. It allows users to chain together multiple processing steps in a sequence of handlers, making it easier to create dynamic and modular processing pipelines. This package is ideal for scenarios where various operations need to be applied in sequence, such as file processing, API interactions, and data transformations.
|
|
25
|
+
|
|
26
|
+
## Features
|
|
27
|
+
|
|
28
|
+
- **Chain of Responsibility Pattern**: Easily define processing chains with different handlers performing specialized tasks.
|
|
29
|
+
- **Modular and Extensible**: Customize the chain by adding or removing handlers as needed.
|
|
30
|
+
- **Predefined Handlers**: A set of built-in handlers is provided for common tasks.
|
|
31
|
+
- **Dynamic Handler Discovery**: Automatically identify and instantiate handlers using the Factory pattern.
|
|
32
|
+
- **Seamless Integration**: Designed to integrate with larger applications, particularly when task delegation and flexible processing pipelines are required.
|
|
33
|
+
|
|
34
|
+
## Installation
|
|
35
|
+
|
|
36
|
+
You can install `awschain` directly from PyPI:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install awschain
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Usage
|
|
43
|
+
|
|
44
|
+
## Example Use Case
|
|
45
|
+
|
|
46
|
+
Let’s say you want to process files by first reading their content, performing a summarization using Generative AI, and then writing the results to another location. You can achieve this by defining a chain with three handlers: `LocalFileReaderHandler`, `PromptHandler`, `AmazonBedrockHandler`, and `LocalFileWriterHandler`.
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from awschain import HandlerFactory, ConfigLoader
|
|
50
|
+
|
|
51
|
+
# Load config
|
|
52
|
+
ConfigLoader.load_config("/path/to/config.yaml")
|
|
53
|
+
|
|
54
|
+
# Create the handlers
|
|
55
|
+
reader = HandlerFactory.get_handler("LocalFileReaderHandler")
|
|
56
|
+
prompt_handler = HandlerFactory.get_handler("PromptHandler")
|
|
57
|
+
transformer = HandlerFactory.get_handler("AmazonBedrockHandler")
|
|
58
|
+
writer = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
59
|
+
|
|
60
|
+
# Set up the chain
|
|
61
|
+
reader.set_next(prompt_handler).set_next(transformer).set_next(writer)
|
|
62
|
+
|
|
63
|
+
# Please store your prompt in your root of your project in prompts folder. Example: prompts/default_prompt.txt
|
|
64
|
+
|
|
65
|
+
# Define the request
|
|
66
|
+
request = {"file_path": "example.txt", "write_file_path": "output.txt", "prompt": "default_prompt"}
|
|
67
|
+
|
|
68
|
+
# Execute the chain
|
|
69
|
+
reader.handle(request)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Built-in Handlers
|
|
73
|
+
|
|
74
|
+
`awschain` comes with several predefined handlers that can be used right out of the box. Examples include:
|
|
75
|
+
|
|
76
|
+
Readers:
|
|
77
|
+
- **LocalFileReaderHandler**: Handles local audio, video, and text files for processing.
|
|
78
|
+
- **S3ReaderHandler**: Manages the reading and downloading of S3 objects (files) from Amazon S3.
|
|
79
|
+
= **HTTPHandler**: Generic HTTP handler that allows you to fetch HTML data from http(s) endpoints. It uses BeautifulSoup to clean HTML tags.
|
|
80
|
+
|
|
81
|
+
- **PDFReaderHandler**: Extracts text from PDF documents for summarization.
|
|
82
|
+
- **MicrosoftExcelReaderHandler**: Extract text from Microsoft Excel documents.
|
|
83
|
+
- **MicrosoftWordReaderHandler**: Extract text from Microsoft Word documents.
|
|
84
|
+
- **QuipReaderHandler**: Extract text from Quip document.
|
|
85
|
+
- **YouTubeReaderHandler**: Downloads videos from YouTube URLs and extracts audio.
|
|
86
|
+
|
|
87
|
+
Processors:
|
|
88
|
+
- **AmazonBedrockHandler**: Summarizes text content using Amazon Bedrock.
|
|
89
|
+
- **AmazonBedrockChatHandler**: Used to perform interactive chat with Amazon Bedrock using the messages API.
|
|
90
|
+
- **AmazonComprehendInsightsHandler**: Extract valuable insights from your data using Amazon Comprehend NLP capabilities.
|
|
91
|
+
- **AmazonComprehendPIIHandler**, **AmazonComprehendPIITokenizeHandler** and **AmazonComprehendPIIUntokenizeHandler**: Used to detect, tokenize and untokenize PII data in your text retaining the context and allowing downstream services such as Bedrock to process the data without PII.
|
|
92
|
+
- **AmazonTranscriptionHandler**: Transcribes audio files into text using Amazon Transcribe.
|
|
93
|
+
- **AmazonTextractHandler**: Extracts text from images such as .jpg, .png, .tiff
|
|
94
|
+
- **HTMLCleanerHandler**: Used to clean HTML tags when consuming web page / HTML documents.
|
|
95
|
+
- **PromptHandler**: Uses a minimalistic prompt framework - all your prompts can be stored in the prompts/ folder and you can select which prompt to use when invoking the main.py.
|
|
96
|
+
|
|
97
|
+
Writers:
|
|
98
|
+
- **S3WriterHandler**: Manages the uploading of of S3 objects (files) to Amazon S3.
|
|
99
|
+
- **LocalFileWriterHandler**: Writes output into a local file.
|
|
100
|
+
- **ClipboardWriterHandler**: Writes output into clipboard.
|
|
101
|
+
|
|
102
|
+
You can also create your own custom handlers by extending the base `Handler` class.
|
|
103
|
+
|
|
104
|
+
## Extending awschain
|
|
105
|
+
|
|
106
|
+
If you need to add custom functionality, you can extend the framework by writing custom handlers and integrating them into the chain.
|
|
107
|
+
|
|
108
|
+
To create a custom handler, simply subclass the `AbstractHandler` class and implement the `handle` method:
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
from awschainhandlers.abstract_handler import AbstractHandler
|
|
112
|
+
|
|
113
|
+
class CustomHandler(AbstractHandler):
|
|
114
|
+
def handle(self, request):
|
|
115
|
+
# Process the request
|
|
116
|
+
if request.get("custom"):
|
|
117
|
+
print("Handling custom request.")
|
|
118
|
+
# Pass to the next handler in the chain if applicable
|
|
119
|
+
return super().handle(request)
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Contributing
|
|
123
|
+
|
|
124
|
+
Contributions are welcome! Feel free to open an issue or submit a pull request if you have ideas to improve `awschain`.
|
|
125
|
+
|
|
126
|
+
## License
|
|
127
|
+
|
|
128
|
+
`awschain` is licensed under the MIT License.
|
awschain-0.1.0/README.md
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
|
|
2
|
+
# awschain
|
|
3
|
+
|
|
4
|
+
`awschain` is a Python package that provides a flexible and extensible implementation of the **Chain of Responsibility** design pattern. It allows users to chain together multiple processing steps in a sequence of handlers, making it easier to create dynamic and modular processing pipelines. This package is ideal for scenarios where various operations need to be applied in sequence, such as file processing, API interactions, and data transformations.
|
|
5
|
+
|
|
6
|
+
## Features
|
|
7
|
+
|
|
8
|
+
- **Chain of Responsibility Pattern**: Easily define processing chains with different handlers performing specialized tasks.
|
|
9
|
+
- **Modular and Extensible**: Customize the chain by adding or removing handlers as needed.
|
|
10
|
+
- **Predefined Handlers**: A set of built-in handlers is provided for common tasks.
|
|
11
|
+
- **Dynamic Handler Discovery**: Automatically identify and instantiate handlers using the Factory pattern.
|
|
12
|
+
- **Seamless Integration**: Designed to integrate with larger applications, particularly when task delegation and flexible processing pipelines are required.
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
You can install `awschain` directly from PyPI:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install awschain
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Usage
|
|
23
|
+
|
|
24
|
+
## Example Use Case
|
|
25
|
+
|
|
26
|
+
Let’s say you want to process files by first reading their content, performing a summarization using Generative AI, and then writing the results to another location. You can achieve this by defining a chain with three handlers: `LocalFileReaderHandler`, `PromptHandler`, `AmazonBedrockHandler`, and `LocalFileWriterHandler`.
|
|
27
|
+
|
|
28
|
+
```python
|
|
29
|
+
from awschain import HandlerFactory, ConfigLoader
|
|
30
|
+
|
|
31
|
+
# Load config
|
|
32
|
+
ConfigLoader.load_config("/path/to/config.yaml")
|
|
33
|
+
|
|
34
|
+
# Create the handlers
|
|
35
|
+
reader = HandlerFactory.get_handler("LocalFileReaderHandler")
|
|
36
|
+
prompt_handler = HandlerFactory.get_handler("PromptHandler")
|
|
37
|
+
transformer = HandlerFactory.get_handler("AmazonBedrockHandler")
|
|
38
|
+
writer = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
39
|
+
|
|
40
|
+
# Set up the chain
|
|
41
|
+
reader.set_next(prompt_handler).set_next(transformer).set_next(writer)
|
|
42
|
+
|
|
43
|
+
# Please store your prompt in your root of your project in prompts folder. Example: prompts/default_prompt.txt
|
|
44
|
+
|
|
45
|
+
# Define the request
|
|
46
|
+
request = {"file_path": "example.txt", "write_file_path": "output.txt", "prompt": "default_prompt"}
|
|
47
|
+
|
|
48
|
+
# Execute the chain
|
|
49
|
+
reader.handle(request)
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### Built-in Handlers
|
|
53
|
+
|
|
54
|
+
`awschain` comes with several predefined handlers that can be used right out of the box. Examples include:
|
|
55
|
+
|
|
56
|
+
Readers:
|
|
57
|
+
- **LocalFileReaderHandler**: Handles local audio, video, and text files for processing.
|
|
58
|
+
- **S3ReaderHandler**: Manages the reading and downloading of S3 objects (files) from Amazon S3.
|
|
59
|
+
= **HTTPHandler**: Generic HTTP handler that allows you to fetch HTML data from http(s) endpoints. It uses BeautifulSoup to clean HTML tags.
|
|
60
|
+
|
|
61
|
+
- **PDFReaderHandler**: Extracts text from PDF documents for summarization.
|
|
62
|
+
- **MicrosoftExcelReaderHandler**: Extract text from Microsoft Excel documents.
|
|
63
|
+
- **MicrosoftWordReaderHandler**: Extract text from Microsoft Word documents.
|
|
64
|
+
- **QuipReaderHandler**: Extract text from Quip document.
|
|
65
|
+
- **YouTubeReaderHandler**: Downloads videos from YouTube URLs and extracts audio.
|
|
66
|
+
|
|
67
|
+
Processors:
|
|
68
|
+
- **AmazonBedrockHandler**: Summarizes text content using Amazon Bedrock.
|
|
69
|
+
- **AmazonBedrockChatHandler**: Used to perform interactive chat with Amazon Bedrock using the messages API.
|
|
70
|
+
- **AmazonComprehendInsightsHandler**: Extract valuable insights from your data using Amazon Comprehend NLP capabilities.
|
|
71
|
+
- **AmazonComprehendPIIHandler**, **AmazonComprehendPIITokenizeHandler** and **AmazonComprehendPIIUntokenizeHandler**: Used to detect, tokenize and untokenize PII data in your text retaining the context and allowing downstream services such as Bedrock to process the data without PII.
|
|
72
|
+
- **AmazonTranscriptionHandler**: Transcribes audio files into text using Amazon Transcribe.
|
|
73
|
+
- **AmazonTextractHandler**: Extracts text from images such as .jpg, .png, .tiff
|
|
74
|
+
- **HTMLCleanerHandler**: Used to clean HTML tags when consuming web page / HTML documents.
|
|
75
|
+
- **PromptHandler**: Uses a minimalistic prompt framework - all your prompts can be stored in the prompts/ folder and you can select which prompt to use when invoking the main.py.
|
|
76
|
+
|
|
77
|
+
Writers:
|
|
78
|
+
- **S3WriterHandler**: Manages the uploading of of S3 objects (files) to Amazon S3.
|
|
79
|
+
- **LocalFileWriterHandler**: Writes output into a local file.
|
|
80
|
+
- **ClipboardWriterHandler**: Writes output into clipboard.
|
|
81
|
+
|
|
82
|
+
You can also create your own custom handlers by extending the base `Handler` class.
|
|
83
|
+
|
|
84
|
+
## Extending awschain
|
|
85
|
+
|
|
86
|
+
If you need to add custom functionality, you can extend the framework by writing custom handlers and integrating them into the chain.
|
|
87
|
+
|
|
88
|
+
To create a custom handler, simply subclass the `AbstractHandler` class and implement the `handle` method:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
from awschainhandlers.abstract_handler import AbstractHandler
|
|
92
|
+
|
|
93
|
+
class CustomHandler(AbstractHandler):
|
|
94
|
+
def handle(self, request):
|
|
95
|
+
# Process the request
|
|
96
|
+
if request.get("custom"):
|
|
97
|
+
print("Handling custom request.")
|
|
98
|
+
# Pass to the next handler in the chain if applicable
|
|
99
|
+
return super().handle(request)
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Contributing
|
|
103
|
+
|
|
104
|
+
Contributions are welcome! Feel free to open an issue or submit a pull request if you have ideas to improve `awschain`.
|
|
105
|
+
|
|
106
|
+
## License
|
|
107
|
+
|
|
108
|
+
`awschain` is licensed under the MIT License.
|
awschain-0.1.0/setup.cfg
ADDED
awschain-0.1.0/setup.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from setuptools import setup, find_packages
|
|
3
|
+
|
|
4
|
+
with open("README.md", "r", encoding="utf-8") as fh:
|
|
5
|
+
long_description = fh.read()
|
|
6
|
+
|
|
7
|
+
# Conditionally load requirements.txt if it exists
|
|
8
|
+
if os.path.exists("requirements.txt"):
|
|
9
|
+
with open("requirements.txt", "r", encoding="utf-8") as f:
|
|
10
|
+
requirements = f.read().splitlines()
|
|
11
|
+
else:
|
|
12
|
+
requirements = []
|
|
13
|
+
|
|
14
|
+
setup(
|
|
15
|
+
name="awschain",
|
|
16
|
+
version="0.1.0",
|
|
17
|
+
author="Kamen Sharlandjiev",
|
|
18
|
+
author_email="ksharlandjiev@gmail.com",
|
|
19
|
+
description="A framework for chaining AWS services using the chain of responsibility pattern",
|
|
20
|
+
long_description=long_description,
|
|
21
|
+
long_description_content_type="text/markdown",
|
|
22
|
+
url="https://github.com/ksharlandjiev/awschain",
|
|
23
|
+
packages=find_packages(where="src"),
|
|
24
|
+
package_dir={"": "src"},
|
|
25
|
+
include_package_data=True,
|
|
26
|
+
package_data={"awschain": ["**/*.py"]},
|
|
27
|
+
zip_safe=False,
|
|
28
|
+
classifiers=[
|
|
29
|
+
"Development Status :: 3 - Alpha",
|
|
30
|
+
"Intended Audience :: Developers",
|
|
31
|
+
"License :: OSI Approved :: MIT License",
|
|
32
|
+
"Operating System :: OS Independent",
|
|
33
|
+
"Programming Language :: Python :: 3.8",
|
|
34
|
+
"Programming Language :: Python :: 3.9",
|
|
35
|
+
],
|
|
36
|
+
python_requires=">=3.7",
|
|
37
|
+
install_requires=requirements, # Optional if requirements.txt doesn't exist
|
|
38
|
+
test_suite='tests',
|
|
39
|
+
)
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
#!/opt/anaconda3/bin/python
|
|
2
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
from typing import Any
|
|
8
|
+
from dotenv import load_dotenv
|
|
9
|
+
from handlers.handler_factory import HandlerFactory
|
|
10
|
+
from .utils.config_loader import ConfigLoader
|
|
11
|
+
import argparse
|
|
12
|
+
|
|
13
|
+
# Load configuration
|
|
14
|
+
ConfigLoader.load_config('config.yaml')
|
|
15
|
+
|
|
16
|
+
# Load environment variables from .env file
|
|
17
|
+
load_dotenv()
|
|
18
|
+
|
|
19
|
+
def determine_input_type(file_path):
|
|
20
|
+
if "youtube" in file_path or "youtu.be" in file_path:
|
|
21
|
+
return "youtube_url"
|
|
22
|
+
elif file_path.startswith(('http')):
|
|
23
|
+
return "http"
|
|
24
|
+
elif file_path.startswith(('s3://')):
|
|
25
|
+
return "s3"
|
|
26
|
+
elif file_path.startswith(('quip://')):
|
|
27
|
+
return "quip"
|
|
28
|
+
elif file_path.endswith(('.mp3', '.mp4', '.m4a', '.wav', '.flac', '.mov', '.avi')):
|
|
29
|
+
return "multimedia_file"
|
|
30
|
+
elif file_path.endswith('.pdf'):
|
|
31
|
+
return "pdf"
|
|
32
|
+
elif file_path.endswith('.docx'):
|
|
33
|
+
return "microsoft_word"
|
|
34
|
+
elif file_path.endswith(('.xlsx','.xlsm','.xltx','.xltm')):
|
|
35
|
+
return "microsoft_excel"
|
|
36
|
+
elif file_path.endswith('.pptx'):
|
|
37
|
+
return "microsoft_pp"
|
|
38
|
+
elif file_path.endswith(('.jpg', '.jpeg', '.png', '.tiff')):
|
|
39
|
+
return "image_file"
|
|
40
|
+
elif file_path.endswith(('.txt', '.json')):
|
|
41
|
+
return "text_or_json"
|
|
42
|
+
else:
|
|
43
|
+
# Assume text
|
|
44
|
+
return "text_or_json"
|
|
45
|
+
|
|
46
|
+
def construct_chain(input_type, args):
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Use if-elif-else to construct the appropriate chain. In Python 3.10 we could use match statement.
|
|
50
|
+
if input_type == "youtube_url":
|
|
51
|
+
youtube_handler = HandlerFactory.get_handler("YouTubeReaderHandler")
|
|
52
|
+
s3writer_handler = HandlerFactory.get_handler("AmazonS3WriterHandler")
|
|
53
|
+
transcription_handler = HandlerFactory.get_handler("AmazonTranscriptionHandler")
|
|
54
|
+
local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
55
|
+
|
|
56
|
+
chain = youtube_handler
|
|
57
|
+
current_handler = youtube_handler.set_next(s3writer_handler).set_next(transcription_handler).set_next(local_file_writer_handler)
|
|
58
|
+
elif input_type == "multimedia_file":
|
|
59
|
+
s3writer_handler = HandlerFactory.get_handler("AmazonS3WriterHandler")
|
|
60
|
+
transcription_handler = HandlerFactory.get_handler("AmazonTranscriptionHandler")
|
|
61
|
+
local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
62
|
+
|
|
63
|
+
chain = s3writer_handler
|
|
64
|
+
current_handler = s3writer_handler.set_next(transcription_handler).set_next(local_file_writer_handler)
|
|
65
|
+
|
|
66
|
+
elif input_type == "multimedia_file_whisper":
|
|
67
|
+
transcription_handler = HandlerFactory.get_handler("OpenAIWhisperTranscriptionHandler")
|
|
68
|
+
local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
69
|
+
|
|
70
|
+
chain = transcription_handler
|
|
71
|
+
current_handler = transcription_handler.set_next(local_file_writer_handler)
|
|
72
|
+
elif input_type == "image_file":
|
|
73
|
+
local_file_reader_handler = HandlerFactory.get_handler("LocalFileReaderHandler")
|
|
74
|
+
textract_handler = HandlerFactory.get_handler("AmazonTextractHandler")
|
|
75
|
+
local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
76
|
+
chain = local_file_reader_handler
|
|
77
|
+
current_handler = local_file_reader_handler.set_next(textract_handler).set_next(local_file_writer_handler)
|
|
78
|
+
elif input_type == "pdf":
|
|
79
|
+
pdf_handler = HandlerFactory.get_handler("PDFReaderHandler")
|
|
80
|
+
local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
81
|
+
|
|
82
|
+
chain = pdf_handler
|
|
83
|
+
current_handler = pdf_handler.set_next(local_file_writer_handler)
|
|
84
|
+
|
|
85
|
+
elif input_type == "http":
|
|
86
|
+
http_handler = HandlerFactory.get_handler("HTTPHandler")
|
|
87
|
+
http_clean_handler = HandlerFactory.get_handler("HTMLCleanerHandler")
|
|
88
|
+
local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
89
|
+
|
|
90
|
+
chain = http_handler
|
|
91
|
+
current_handler = http_handler.set_next(http_clean_handler).set_next(local_file_writer_handler)
|
|
92
|
+
elif input_type == "text_or_json":
|
|
93
|
+
local_file_reader_handler = HandlerFactory.get_handler("LocalFileReaderHandler")
|
|
94
|
+
|
|
95
|
+
chain = local_file_reader_handler
|
|
96
|
+
current_handler = local_file_reader_handler
|
|
97
|
+
elif input_type == "s3":
|
|
98
|
+
s3reader_handler = HandlerFactory.get_handler("AmazonS3ReaderHandler")
|
|
99
|
+
# local_file_reader_handler = HandlerFactory.get_handler("LocalFileReaderHandler")
|
|
100
|
+
current_handler = chain = s3reader_handler
|
|
101
|
+
|
|
102
|
+
elif input_type == "quip":
|
|
103
|
+
quip_reader_handler = HandlerFactory.get_handler("QuipReaderHandler")
|
|
104
|
+
http_clean_handler = HandlerFactory.get_handler("HTMLCleanerHandler")
|
|
105
|
+
local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
106
|
+
|
|
107
|
+
chain = quip_reader_handler
|
|
108
|
+
current_handler = quip_reader_handler.set_next(http_clean_handler).set_next(local_file_writer_handler)
|
|
109
|
+
elif input_type == "microsoft_word":
|
|
110
|
+
msword_handler = HandlerFactory.get_handler("MicrosoftWordReaderHandler")
|
|
111
|
+
local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
112
|
+
chain = msword_handler
|
|
113
|
+
current_handler = msword_handler.set_next(local_file_writer_handler)
|
|
114
|
+
|
|
115
|
+
elif input_type == "microsoft_excel":
|
|
116
|
+
xls_hanlder = HandlerFactory.get_handler("MicrosoftExcelReaderHandler")
|
|
117
|
+
local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
118
|
+
chain = xls_hanlder
|
|
119
|
+
current_handler = xls_hanlder.set_next(local_file_writer_handler)
|
|
120
|
+
elif input_type == "microsoft_pp":
|
|
121
|
+
pp_handler = HandlerFactory.get_handler("MicrosoftPowerPointReaderHandler")
|
|
122
|
+
local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
|
|
123
|
+
chain = pp_handler
|
|
124
|
+
current_handler = pp_handler.set_next(local_file_writer_handler)
|
|
125
|
+
else:
|
|
126
|
+
# For unsupported types, default to just summarization_handler
|
|
127
|
+
print("Unsupported file type.", input_type)
|
|
128
|
+
sys.exit(1)
|
|
129
|
+
|
|
130
|
+
# Anonymize data?
|
|
131
|
+
anonymize = args.anonymize in (True, 'true', '1')
|
|
132
|
+
if anonymize:
|
|
133
|
+
anonymize_handler = HandlerFactory.get_handler("AmazonComprehendPIITokenizeHandler")
|
|
134
|
+
current_handler = current_handler.set_next(anonymize_handler)
|
|
135
|
+
|
|
136
|
+
# Add the prompt and bedrock handlers.
|
|
137
|
+
prompt_handler = HandlerFactory.get_handler("PromptHandler")
|
|
138
|
+
bedrock_handler = HandlerFactory.get_handler("AmazonBedrockHandler")
|
|
139
|
+
|
|
140
|
+
# Determinate when / if we need to call summarization or Chat and in what order.
|
|
141
|
+
|
|
142
|
+
if args.chat and args.chat != None:
|
|
143
|
+
chat_handler = HandlerFactory.get_handler("AmazonBedrockChatHandler")
|
|
144
|
+
print("Enable chat", args)
|
|
145
|
+
if args.chat == 'sum_first':
|
|
146
|
+
current_handler = current_handler.set_next(prompt_handler).set_next(bedrock_handler).set_next(chat_handler)
|
|
147
|
+
|
|
148
|
+
elif args.chat == 'chat_only':
|
|
149
|
+
current_handler = current_handler.set_next(chat_handler)
|
|
150
|
+
|
|
151
|
+
else:
|
|
152
|
+
current_handler = current_handler.set_next(chat_handler)
|
|
153
|
+
current_handler = current_handler.set_next(prompt_handler).set_next(bedrock_handler)
|
|
154
|
+
else:
|
|
155
|
+
current_handler = current_handler.set_next(prompt_handler).set_next(bedrock_handler)
|
|
156
|
+
|
|
157
|
+
# Finally, if we have tokenized the content, let's untokenize
|
|
158
|
+
if anonymize:
|
|
159
|
+
unanonymize_handler = HandlerFactory.get_handler("AmazonComprehendPIIUntokenizeHandler")
|
|
160
|
+
current_handler = current_handler.set_next(unanonymize_handler)
|
|
161
|
+
|
|
162
|
+
# Copy to clipboard?
|
|
163
|
+
clipboard = os.getenv('CLIPBOARD_COPY', 'false').lower() in ('true', '1', 't')
|
|
164
|
+
if clipboard:
|
|
165
|
+
clipboard_handler = HandlerFactory.get_handler("ClipboardWriterHandler")
|
|
166
|
+
current_handler = current_handler.set_next(clipboard_handler)
|
|
167
|
+
print("\n\n ================================================\n The summary will be copied to your clipboard.\n ================================================\n")
|
|
168
|
+
|
|
169
|
+
return chain
|
|
170
|
+
|
|
171
|
+
def process_file(file_path, args):
|
|
172
|
+
print(f"Processing: {file_path}")
|
|
173
|
+
input_type = determine_input_type(file_path)
|
|
174
|
+
|
|
175
|
+
handler_chain = construct_chain(input_type, args)
|
|
176
|
+
|
|
177
|
+
# Prepare the output filename with the current date and time
|
|
178
|
+
current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
|
179
|
+
local_dir = os.getenv('DIR_STORAGE', './downloads')
|
|
180
|
+
output_file = f"{local_dir}/output_{os.path.basename(file_path)}_{current_time}.txt"
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
request = {
|
|
184
|
+
"type": input_type,
|
|
185
|
+
"path": file_path,
|
|
186
|
+
"prompt_file_name": args.prompt_file_name,
|
|
187
|
+
"text": "",
|
|
188
|
+
"write_file_path": output_file,
|
|
189
|
+
"extract_media": True
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
result = handler_chain.handle(request)
|
|
193
|
+
return result
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def main():
|
|
197
|
+
# Initialize the argument parser
|
|
198
|
+
parser = argparse.ArgumentParser(description='Process input files or URLs.')
|
|
199
|
+
|
|
200
|
+
# Required positional argument for the file/URL to process
|
|
201
|
+
parser.add_argument('path', type=str, help='The path to the file, folder or URL to be processed.')
|
|
202
|
+
|
|
203
|
+
# Optional positional argument for the prompt file name, with a default value
|
|
204
|
+
parser.add_argument('prompt_file_name', nargs='?', default='default_prompt', help='The name of the prompt file. Defaults to "default_prompt" if not specified.')
|
|
205
|
+
|
|
206
|
+
# Optional flag to specify the use of a interactive chat handler
|
|
207
|
+
parser.add_argument("--chat", type=str, default=None, choices=[None, 'sum_first', 'chat_first', 'chat_only'], help="Choose 'sum_first', 'chat_first' to summarize before chat or direct chat interaction with your original text. Select 'chat_only' if you only want to query your original text")
|
|
208
|
+
|
|
209
|
+
# Optional flag to turn off anonymization
|
|
210
|
+
parser.add_argument("--anonymize", type=str, default=True, help="Anonymize customer names before sending to the model. By default this is set to true.")
|
|
211
|
+
|
|
212
|
+
# Optional argument for config file path
|
|
213
|
+
parser.add_argument("--config", type=str, help="Path to the config file. If not provided, will search for config.yaml in the current directory and its parents.")
|
|
214
|
+
|
|
215
|
+
# Parse the command-line arguments
|
|
216
|
+
args = parser.parse_args()
|
|
217
|
+
|
|
218
|
+
# Load configuration
|
|
219
|
+
from awschain.utils.config_loader import ConfigLoader
|
|
220
|
+
ConfigLoader.load_config(args.config)
|
|
221
|
+
|
|
222
|
+
# Handler discovery
|
|
223
|
+
HandlerFactory.discover_handlers()
|
|
224
|
+
|
|
225
|
+
if os.path.isdir(args.path):
|
|
226
|
+
max_processes = int(ConfigLoader.get_config('MAX_PARALLEL_PROCESSES', 1))
|
|
227
|
+
with ThreadPoolExecutor(max_workers=max_processes) as executor:
|
|
228
|
+
futures = [executor.submit(process_file, os.path.join(args.path, f), args) for f in os.listdir(args.path) if os.path.isfile(os.path.join(args.path, f))]
|
|
229
|
+
for future in as_completed(futures):
|
|
230
|
+
result = future.result()
|
|
231
|
+
else:
|
|
232
|
+
result = process_file(args.path, args)
|
|
233
|
+
|
|
234
|
+
if result.get("text", None):
|
|
235
|
+
print(result.get("text"))
|
|
236
|
+
else:
|
|
237
|
+
print(json.dumps(result, indent=2))
|
|
238
|
+
|
|
239
|
+
if __name__ == "__main__":
|
|
240
|
+
main()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from typing import Any
|
|
4
|
+
from .base_handler import BaseHandler
|
|
5
|
+
|
|
6
|
+
class AbstractHandler(BaseHandler):
|
|
7
|
+
"""
|
|
8
|
+
The default chaining behavior can be implemented inside a base handler
|
|
9
|
+
class.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
_next_handler: Handler = None
|
|
13
|
+
|
|
14
|
+
def set_next(self, handler: Handler) -> Handler:
|
|
15
|
+
self._next_handler = handler
|
|
16
|
+
# Returning a handler from here will let us link handlers in a
|
|
17
|
+
# convenient way like this:
|
|
18
|
+
# local_file_handler.set_next(prompt_handler).set_next(summarization_handler)
|
|
19
|
+
return handler
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def handle(self, request: dict) -> dict:
|
|
23
|
+
if self._next_handler:
|
|
24
|
+
return self._next_handler.handle(request)
|
|
25
|
+
|
|
26
|
+
return request
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
class BaseHandler(ABC):
|
|
6
|
+
"""
|
|
7
|
+
The Handler interface declares a method for building the chain of handlers.
|
|
8
|
+
It also declares a method for executing a request.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def set_next(self, handler: Handler) -> Handler:
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def handle(self, request) -> Optional[dict]:
|
|
17
|
+
pass
|