awschain 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. awschain-0.1.0/LICENSE +21 -0
  2. awschain-0.1.0/MANIFEST.in +9 -0
  3. awschain-0.1.0/PKG-INFO +128 -0
  4. awschain-0.1.0/README.md +108 -0
  5. awschain-0.1.0/pyproject.toml +3 -0
  6. awschain-0.1.0/requirements.txt +3 -0
  7. awschain-0.1.0/setup.cfg +4 -0
  8. awschain-0.1.0/setup.py +39 -0
  9. awschain-0.1.0/src/awschain/__init__.py +4 -0
  10. awschain-0.1.0/src/awschain/example.py +240 -0
  11. awschain-0.1.0/src/awschain/handlers/__init__.py +5 -0
  12. awschain-0.1.0/src/awschain/handlers/abstract_handler.py +26 -0
  13. awschain-0.1.0/src/awschain/handlers/base_handler.py +17 -0
  14. awschain-0.1.0/src/awschain/handlers/handler_factory.py +57 -0
  15. awschain-0.1.0/src/awschain/handlers/misc/__init__.py +0 -0
  16. awschain-0.1.0/src/awschain/handlers/misc/clipboard_writer_handler.py +8 -0
  17. awschain-0.1.0/src/awschain/handlers/misc/print_context_handler.py +14 -0
  18. awschain-0.1.0/src/awschain/handlers/misc/remote_file_downloader_handler.py +43 -0
  19. awschain-0.1.0/src/awschain/handlers/processors/__init__.py +0 -0
  20. awschain-0.1.0/src/awschain/handlers/processors/amazon_bedrock_chat_handler.py +32 -0
  21. awschain-0.1.0/src/awschain/handlers/processors/amazon_bedrock_handler.py +62 -0
  22. awschain-0.1.0/src/awschain/handlers/processors/amazon_comprehend_insights_handler.py +95 -0
  23. awschain-0.1.0/src/awschain/handlers/processors/amazon_comprehend_pii_classifier_handler.py +42 -0
  24. awschain-0.1.0/src/awschain/handlers/processors/amazon_comprehend_pii_handler.py +48 -0
  25. awschain-0.1.0/src/awschain/handlers/processors/amazon_comprehend_pii_tokenize_handler.py +119 -0
  26. awschain-0.1.0/src/awschain/handlers/processors/amazon_comprehend_pii_untokenize_handler.py +46 -0
  27. awschain-0.1.0/src/awschain/handlers/processors/amazon_rekognition_handler.py +66 -0
  28. awschain-0.1.0/src/awschain/handlers/processors/amazon_textract_handler.py +138 -0
  29. awschain-0.1.0/src/awschain/handlers/processors/amazon_transcribe_handler.py +91 -0
  30. awschain-0.1.0/src/awschain/handlers/processors/anonymize_handler.py +30 -0
  31. awschain-0.1.0/src/awschain/handlers/processors/html_cleaner_handler.py +11 -0
  32. awschain-0.1.0/src/awschain/handlers/processors/prompt_handler.py +27 -0
  33. awschain-0.1.0/src/awschain/handlers/readers/__init__.py +0 -0
  34. awschain-0.1.0/src/awschain/handlers/readers/amazon_s3_reader_handler.py +35 -0
  35. awschain-0.1.0/src/awschain/handlers/readers/aws_secrets_manager_secret_reader.py +50 -0
  36. awschain-0.1.0/src/awschain/handlers/readers/email_reader_handler.py +83 -0
  37. awschain-0.1.0/src/awschain/handlers/readers/http_handler.py +140 -0
  38. awschain-0.1.0/src/awschain/handlers/readers/local_file_reader_handler.py +22 -0
  39. awschain-0.1.0/src/awschain/handlers/readers/microsoft_excel_reader_handler.py +84 -0
  40. awschain-0.1.0/src/awschain/handlers/readers/microsoft_power_point_reader_handler.py +95 -0
  41. awschain-0.1.0/src/awschain/handlers/readers/microsoft_word_handler.py +112 -0
  42. awschain-0.1.0/src/awschain/handlers/readers/pdf_reader_handler.py +81 -0
  43. awschain-0.1.0/src/awschain/handlers/readers/quip_reader_handler.py +148 -0
  44. awschain-0.1.0/src/awschain/handlers/readers/web_crawler_reader_handler.py +52 -0
  45. awschain-0.1.0/src/awschain/handlers/readers/youtube_reader_handler.py +39 -0
  46. awschain-0.1.0/src/awschain/handlers/writers/__init__.py +0 -0
  47. awschain-0.1.0/src/awschain/handlers/writers/amazon_datazone_asset_writer_handler.py +87 -0
  48. awschain-0.1.0/src/awschain/handlers/writers/amazon_datazone_glossary_writer_handler.py +92 -0
  49. awschain-0.1.0/src/awschain/handlers/writers/amazon_s3_writer_handler.py +118 -0
  50. awschain-0.1.0/src/awschain/handlers/writers/email_sender_handler.py +56 -0
  51. awschain-0.1.0/src/awschain/handlers/writers/local_file_writer_handler.py +51 -0
  52. awschain-0.1.0/src/awschain/handlers/writers/quip_writer_handler.py +85 -0
  53. awschain-0.1.0/src/awschain/utils/__init__.py +2 -0
  54. awschain-0.1.0/src/awschain/utils/aws_boto_client_manager.py +16 -0
  55. awschain-0.1.0/src/awschain/utils/bedrock.py +70 -0
  56. awschain-0.1.0/src/awschain/utils/config.py +1 -0
  57. awschain-0.1.0/src/awschain/utils/config_loader.py +43 -0
  58. awschain-0.1.0/src/awschain/utils/web_utils.py +22 -0
  59. awschain-0.1.0/src/awschain.egg-info/PKG-INFO +128 -0
  60. awschain-0.1.0/src/awschain.egg-info/SOURCES.txt +63 -0
  61. awschain-0.1.0/src/awschain.egg-info/dependency_links.txt +1 -0
  62. awschain-0.1.0/src/awschain.egg-info/not-zip-safe +1 -0
  63. awschain-0.1.0/src/awschain.egg-info/requires.txt +3 -0
  64. awschain-0.1.0/src/awschain.egg-info/top_level.txt +1 -0
  65. awschain-0.1.0/tests/test_misc_handlers.py +43 -0
awschain-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Kamen Sharlandjiev
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,9 @@
1
+ include README.md
2
+ include requirements.txt
3
+ recursive-include src *.py
4
+ recursive-include src/awschain *.py
5
+ recursive-include src/awschain/handlers *.py
6
+ global-exclude __pycache__
7
+ global-exclude *.py[cod]
8
+ include src/awschain/__init__.py
9
+ include src/awschain/handlers/__init__.py
@@ -0,0 +1,128 @@
1
+ Metadata-Version: 2.1
2
+ Name: awschain
3
+ Version: 0.1.0
4
+ Summary: A framework for chaining AWS services using the chain of responsibility pattern
5
+ Home-page: https://github.com/ksharlandjiev/awschain
6
+ Author: Kamen Sharlandjiev
7
+ Author-email: ksharlandjiev@gmail.com
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3.8
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Requires-Python: >=3.7
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: requests
18
+ Requires-Dist: boto3
19
+ Requires-Dist: python-dotenv
20
+
21
+
22
+ # awschain
23
+
24
+ `awschain` is a Python package that provides a flexible and extensible implementation of the **Chain of Responsibility** design pattern. It allows users to chain together multiple processing steps in a sequence of handlers, making it easier to create dynamic and modular processing pipelines. This package is ideal for scenarios where various operations need to be applied in sequence, such as file processing, API interactions, and data transformations.
25
+
26
+ ## Features
27
+
28
+ - **Chain of Responsibility Pattern**: Easily define processing chains with different handlers performing specialized tasks.
29
+ - **Modular and Extensible**: Customize the chain by adding or removing handlers as needed.
30
+ - **Predefined Handlers**: A set of built-in handlers is provided for common tasks.
31
+ - **Dynamic Handler Discovery**: Automatically identify and instantiate handlers using the Factory pattern.
32
+ - **Seamless Integration**: Designed to integrate with larger applications, particularly when task delegation and flexible processing pipelines are required.
33
+
34
+ ## Installation
35
+
36
+ You can install `awschain` directly from PyPI:
37
+
38
+ ```bash
39
+ pip install awschain
40
+ ```
41
+
42
+ ## Usage
43
+
44
+ ## Example Use Case
45
+
46
+ Let’s say you want to process files by first reading their content, performing a summarization using Generative AI, and then writing the results to another location. You can achieve this by defining a chain with three handlers: `LocalFileReaderHandler`, `PromptHandler`, `AmazonBedrockHandler`, and `LocalFileWriterHandler`.
47
+
48
+ ```python
49
+ from awschain import HandlerFactory, ConfigLoader
50
+
51
+ # Load config
52
+ ConfigLoader.load_config("/path/to/config.yaml")
53
+
54
+ # Create the handlers
55
+ reader = HandlerFactory.get_handler("LocalFileReaderHandler")
56
+ prompt_handler = HandlerFactory.get_handler("PromptHandler")
57
+ transformer = HandlerFactory.get_handler("AmazonBedrockHandler")
58
+ writer = HandlerFactory.get_handler("LocalFileWriterHandler")
59
+
60
+ # Set up the chain
61
+ reader.set_next(prompt_handler).set_next(transformer).set_next(writer)
62
+
63
+ # Please store your prompt in your root of your project in prompts folder. Example: prompts/default_prompt.txt
64
+
65
+ # Define the request
66
+ request = {"file_path": "example.txt", "write_file_path": "output.txt", "prompt": "default_prompt"}
67
+
68
+ # Execute the chain
69
+ reader.handle(request)
70
+ ```
71
+
72
+ ### Built-in Handlers
73
+
74
+ `awschain` comes with several predefined handlers that can be used right out of the box. Examples include:
75
+
76
+ Readers:
77
+ - **LocalFileReaderHandler**: Handles local audio, video, and text files for processing.
78
+ - **S3ReaderHandler**: Manages the reading and downloading of S3 objects (files) from Amazon S3.
79
+ = **HTTPHandler**: Generic HTTP handler that allows you to fetch HTML data from http(s) endpoints. It uses BeautifulSoup to clean HTML tags.
80
+
81
+ - **PDFReaderHandler**: Extracts text from PDF documents for summarization.
82
+ - **MicrosoftExcelReaderHandler**: Extract text from Microsoft Excel documents.
83
+ - **MicrosoftWordReaderHandler**: Extract text from Microsoft Word documents.
84
+ - **QuipReaderHandler**: Extract text from Quip document.
85
+ - **YouTubeReaderHandler**: Downloads videos from YouTube URLs and extracts audio.
86
+
87
+ Processors:
88
+ - **AmazonBedrockHandler**: Summarizes text content using Amazon Bedrock.
89
+ - **AmazonBedrockChatHandler**: Used to perform interactive chat with Amazon Bedrock using the messages API.
90
+ - **AmazonComprehendInsightsHandler**: Extract valuable insights from your data using Amazon Comprehend NLP capabilities.
91
+ - **AmazonComprehendPIIHandler**, **AmazonComprehendPIITokenizeHandler** and **AmazonComprehendPIIUntokenizeHandler**: Used to detect, tokenize and untokenize PII data in your text retaining the context and allowing downstream services such as Bedrock to process the data without PII.
92
+ - **AmazonTranscriptionHandler**: Transcribes audio files into text using Amazon Transcribe.
93
+ - **AmazonTextractHandler**: Extracts text from images such as .jpg, .png, .tiff
94
+ - **HTMLCleanerHandler**: Used to clean HTML tags when consuming web page / HTML documents.
95
+ - **PromptHandler**: Uses a minimalistic prompt framework - all your prompts can be stored in the prompts/ folder and you can select which prompt to use when invoking the main.py.
96
+
97
+ Writers:
98
+ - **S3WriterHandler**: Manages the uploading of of S3 objects (files) to Amazon S3.
99
+ - **LocalFileWriterHandler**: Writes output into a local file.
100
+ - **ClipboardWriterHandler**: Writes output into clipboard.
101
+
102
+ You can also create your own custom handlers by extending the base `Handler` class.
103
+
104
+ ## Extending awschain
105
+
106
+ If you need to add custom functionality, you can extend the framework by writing custom handlers and integrating them into the chain.
107
+
108
+ To create a custom handler, simply subclass the `AbstractHandler` class and implement the `handle` method:
109
+
110
+ ```python
111
+ from awschainhandlers.abstract_handler import AbstractHandler
112
+
113
+ class CustomHandler(AbstractHandler):
114
+ def handle(self, request):
115
+ # Process the request
116
+ if request.get("custom"):
117
+ print("Handling custom request.")
118
+ # Pass to the next handler in the chain if applicable
119
+ return super().handle(request)
120
+ ```
121
+
122
+ ## Contributing
123
+
124
+ Contributions are welcome! Feel free to open an issue or submit a pull request if you have ideas to improve `awschain`.
125
+
126
+ ## License
127
+
128
+ `awschain` is licensed under the MIT License.
@@ -0,0 +1,108 @@
1
+
2
+ # awschain
3
+
4
+ `awschain` is a Python package that provides a flexible and extensible implementation of the **Chain of Responsibility** design pattern. It allows users to chain together multiple processing steps in a sequence of handlers, making it easier to create dynamic and modular processing pipelines. This package is ideal for scenarios where various operations need to be applied in sequence, such as file processing, API interactions, and data transformations.
5
+
6
+ ## Features
7
+
8
+ - **Chain of Responsibility Pattern**: Easily define processing chains with different handlers performing specialized tasks.
9
+ - **Modular and Extensible**: Customize the chain by adding or removing handlers as needed.
10
+ - **Predefined Handlers**: A set of built-in handlers is provided for common tasks.
11
+ - **Dynamic Handler Discovery**: Automatically identify and instantiate handlers using the Factory pattern.
12
+ - **Seamless Integration**: Designed to integrate with larger applications, particularly when task delegation and flexible processing pipelines are required.
13
+
14
+ ## Installation
15
+
16
+ You can install `awschain` directly from PyPI:
17
+
18
+ ```bash
19
+ pip install awschain
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ ## Example Use Case
25
+
26
+ Let’s say you want to process files by first reading their content, performing a summarization using Generative AI, and then writing the results to another location. You can achieve this by defining a chain with three handlers: `LocalFileReaderHandler`, `PromptHandler`, `AmazonBedrockHandler`, and `LocalFileWriterHandler`.
27
+
28
+ ```python
29
+ from awschain import HandlerFactory, ConfigLoader
30
+
31
+ # Load config
32
+ ConfigLoader.load_config("/path/to/config.yaml")
33
+
34
+ # Create the handlers
35
+ reader = HandlerFactory.get_handler("LocalFileReaderHandler")
36
+ prompt_handler = HandlerFactory.get_handler("PromptHandler")
37
+ transformer = HandlerFactory.get_handler("AmazonBedrockHandler")
38
+ writer = HandlerFactory.get_handler("LocalFileWriterHandler")
39
+
40
+ # Set up the chain
41
+ reader.set_next(prompt_handler).set_next(transformer).set_next(writer)
42
+
43
+ # Please store your prompt in your root of your project in prompts folder. Example: prompts/default_prompt.txt
44
+
45
+ # Define the request
46
+ request = {"file_path": "example.txt", "write_file_path": "output.txt", "prompt": "default_prompt"}
47
+
48
+ # Execute the chain
49
+ reader.handle(request)
50
+ ```
51
+
52
+ ### Built-in Handlers
53
+
54
+ `awschain` comes with several predefined handlers that can be used right out of the box. Examples include:
55
+
56
+ Readers:
57
+ - **LocalFileReaderHandler**: Handles local audio, video, and text files for processing.
58
+ - **S3ReaderHandler**: Manages the reading and downloading of S3 objects (files) from Amazon S3.
59
+ = **HTTPHandler**: Generic HTTP handler that allows you to fetch HTML data from http(s) endpoints. It uses BeautifulSoup to clean HTML tags.
60
+
61
+ - **PDFReaderHandler**: Extracts text from PDF documents for summarization.
62
+ - **MicrosoftExcelReaderHandler**: Extract text from Microsoft Excel documents.
63
+ - **MicrosoftWordReaderHandler**: Extract text from Microsoft Word documents.
64
+ - **QuipReaderHandler**: Extract text from Quip document.
65
+ - **YouTubeReaderHandler**: Downloads videos from YouTube URLs and extracts audio.
66
+
67
+ Processors:
68
+ - **AmazonBedrockHandler**: Summarizes text content using Amazon Bedrock.
69
+ - **AmazonBedrockChatHandler**: Used to perform interactive chat with Amazon Bedrock using the messages API.
70
+ - **AmazonComprehendInsightsHandler**: Extract valuable insights from your data using Amazon Comprehend NLP capabilities.
71
+ - **AmazonComprehendPIIHandler**, **AmazonComprehendPIITokenizeHandler** and **AmazonComprehendPIIUntokenizeHandler**: Used to detect, tokenize and untokenize PII data in your text retaining the context and allowing downstream services such as Bedrock to process the data without PII.
72
+ - **AmazonTranscriptionHandler**: Transcribes audio files into text using Amazon Transcribe.
73
+ - **AmazonTextractHandler**: Extracts text from images such as .jpg, .png, .tiff
74
+ - **HTMLCleanerHandler**: Used to clean HTML tags when consuming web page / HTML documents.
75
+ - **PromptHandler**: Uses a minimalistic prompt framework - all your prompts can be stored in the prompts/ folder and you can select which prompt to use when invoking the main.py.
76
+
77
+ Writers:
78
+ - **S3WriterHandler**: Manages the uploading of of S3 objects (files) to Amazon S3.
79
+ - **LocalFileWriterHandler**: Writes output into a local file.
80
+ - **ClipboardWriterHandler**: Writes output into clipboard.
81
+
82
+ You can also create your own custom handlers by extending the base `Handler` class.
83
+
84
+ ## Extending awschain
85
+
86
+ If you need to add custom functionality, you can extend the framework by writing custom handlers and integrating them into the chain.
87
+
88
+ To create a custom handler, simply subclass the `AbstractHandler` class and implement the `handle` method:
89
+
90
+ ```python
91
+ from awschainhandlers.abstract_handler import AbstractHandler
92
+
93
+ class CustomHandler(AbstractHandler):
94
+ def handle(self, request):
95
+ # Process the request
96
+ if request.get("custom"):
97
+ print("Handling custom request.")
98
+ # Pass to the next handler in the chain if applicable
99
+ return super().handle(request)
100
+ ```
101
+
102
+ ## Contributing
103
+
104
+ Contributions are welcome! Feel free to open an issue or submit a pull request if you have ideas to improve `awschain`.
105
+
106
+ ## License
107
+
108
+ `awschain` is licensed under the MIT License.
@@ -0,0 +1,3 @@
1
+ [build-system]
2
+ requires = ["setuptools", "wheel"]
3
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,3 @@
1
+ requests
2
+ boto3
3
+ python-dotenv
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,39 @@
1
+ import os
2
+ from setuptools import setup, find_packages
3
+
4
+ with open("README.md", "r", encoding="utf-8") as fh:
5
+ long_description = fh.read()
6
+
7
+ # Conditionally load requirements.txt if it exists
8
+ if os.path.exists("requirements.txt"):
9
+ with open("requirements.txt", "r", encoding="utf-8") as f:
10
+ requirements = f.read().splitlines()
11
+ else:
12
+ requirements = []
13
+
14
+ setup(
15
+ name="awschain",
16
+ version="0.1.0",
17
+ author="Kamen Sharlandjiev",
18
+ author_email="ksharlandjiev@gmail.com",
19
+ description="A framework for chaining AWS services using the chain of responsibility pattern",
20
+ long_description=long_description,
21
+ long_description_content_type="text/markdown",
22
+ url="https://github.com/ksharlandjiev/awschain",
23
+ packages=find_packages(where="src"),
24
+ package_dir={"": "src"},
25
+ include_package_data=True,
26
+ package_data={"awschain": ["**/*.py"]},
27
+ zip_safe=False,
28
+ classifiers=[
29
+ "Development Status :: 3 - Alpha",
30
+ "Intended Audience :: Developers",
31
+ "License :: OSI Approved :: MIT License",
32
+ "Operating System :: OS Independent",
33
+ "Programming Language :: Python :: 3.8",
34
+ "Programming Language :: Python :: 3.9",
35
+ ],
36
+ python_requires=">=3.7",
37
+ install_requires=requirements, # Optional if requirements.txt doesn't exist
38
+ test_suite='tests',
39
+ )
@@ -0,0 +1,4 @@
1
+ from .handlers.handler_factory import HandlerFactory
2
+ from .utils.config_loader import ConfigLoader
3
+
4
+ __all__ = ['HandlerFactory', 'ConfigLoader']
@@ -0,0 +1,240 @@
1
+ #!/opt/anaconda3/bin/python
2
+ from concurrent.futures import ThreadPoolExecutor, as_completed
3
+ from datetime import datetime
4
+ import json
5
+ import os
6
+ import sys
7
+ from typing import Any
8
+ from dotenv import load_dotenv
9
+ from handlers.handler_factory import HandlerFactory
10
+ from .utils.config_loader import ConfigLoader
11
+ import argparse
12
+
13
+ # Load configuration
14
+ ConfigLoader.load_config('config.yaml')
15
+
16
+ # Load environment variables from .env file
17
+ load_dotenv()
18
+
19
+ def determine_input_type(file_path):
20
+ if "youtube" in file_path or "youtu.be" in file_path:
21
+ return "youtube_url"
22
+ elif file_path.startswith(('http')):
23
+ return "http"
24
+ elif file_path.startswith(('s3://')):
25
+ return "s3"
26
+ elif file_path.startswith(('quip://')):
27
+ return "quip"
28
+ elif file_path.endswith(('.mp3', '.mp4', '.m4a', '.wav', '.flac', '.mov', '.avi')):
29
+ return "multimedia_file"
30
+ elif file_path.endswith('.pdf'):
31
+ return "pdf"
32
+ elif file_path.endswith('.docx'):
33
+ return "microsoft_word"
34
+ elif file_path.endswith(('.xlsx','.xlsm','.xltx','.xltm')):
35
+ return "microsoft_excel"
36
+ elif file_path.endswith('.pptx'):
37
+ return "microsoft_pp"
38
+ elif file_path.endswith(('.jpg', '.jpeg', '.png', '.tiff')):
39
+ return "image_file"
40
+ elif file_path.endswith(('.txt', '.json')):
41
+ return "text_or_json"
42
+ else:
43
+ # Assume text
44
+ return "text_or_json"
45
+
46
+ def construct_chain(input_type, args):
47
+
48
+
49
+ # Use if-elif-else to construct the appropriate chain. In Python 3.10 we could use match statement.
50
+ if input_type == "youtube_url":
51
+ youtube_handler = HandlerFactory.get_handler("YouTubeReaderHandler")
52
+ s3writer_handler = HandlerFactory.get_handler("AmazonS3WriterHandler")
53
+ transcription_handler = HandlerFactory.get_handler("AmazonTranscriptionHandler")
54
+ local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
55
+
56
+ chain = youtube_handler
57
+ current_handler = youtube_handler.set_next(s3writer_handler).set_next(transcription_handler).set_next(local_file_writer_handler)
58
+ elif input_type == "multimedia_file":
59
+ s3writer_handler = HandlerFactory.get_handler("AmazonS3WriterHandler")
60
+ transcription_handler = HandlerFactory.get_handler("AmazonTranscriptionHandler")
61
+ local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
62
+
63
+ chain = s3writer_handler
64
+ current_handler = s3writer_handler.set_next(transcription_handler).set_next(local_file_writer_handler)
65
+
66
+ elif input_type == "multimedia_file_whisper":
67
+ transcription_handler = HandlerFactory.get_handler("OpenAIWhisperTranscriptionHandler")
68
+ local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
69
+
70
+ chain = transcription_handler
71
+ current_handler = transcription_handler.set_next(local_file_writer_handler)
72
+ elif input_type == "image_file":
73
+ local_file_reader_handler = HandlerFactory.get_handler("LocalFileReaderHandler")
74
+ textract_handler = HandlerFactory.get_handler("AmazonTextractHandler")
75
+ local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
76
+ chain = local_file_reader_handler
77
+ current_handler = local_file_reader_handler.set_next(textract_handler).set_next(local_file_writer_handler)
78
+ elif input_type == "pdf":
79
+ pdf_handler = HandlerFactory.get_handler("PDFReaderHandler")
80
+ local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
81
+
82
+ chain = pdf_handler
83
+ current_handler = pdf_handler.set_next(local_file_writer_handler)
84
+
85
+ elif input_type == "http":
86
+ http_handler = HandlerFactory.get_handler("HTTPHandler")
87
+ http_clean_handler = HandlerFactory.get_handler("HTMLCleanerHandler")
88
+ local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
89
+
90
+ chain = http_handler
91
+ current_handler = http_handler.set_next(http_clean_handler).set_next(local_file_writer_handler)
92
+ elif input_type == "text_or_json":
93
+ local_file_reader_handler = HandlerFactory.get_handler("LocalFileReaderHandler")
94
+
95
+ chain = local_file_reader_handler
96
+ current_handler = local_file_reader_handler
97
+ elif input_type == "s3":
98
+ s3reader_handler = HandlerFactory.get_handler("AmazonS3ReaderHandler")
99
+ # local_file_reader_handler = HandlerFactory.get_handler("LocalFileReaderHandler")
100
+ current_handler = chain = s3reader_handler
101
+
102
+ elif input_type == "quip":
103
+ quip_reader_handler = HandlerFactory.get_handler("QuipReaderHandler")
104
+ http_clean_handler = HandlerFactory.get_handler("HTMLCleanerHandler")
105
+ local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
106
+
107
+ chain = quip_reader_handler
108
+ current_handler = quip_reader_handler.set_next(http_clean_handler).set_next(local_file_writer_handler)
109
+ elif input_type == "microsoft_word":
110
+ msword_handler = HandlerFactory.get_handler("MicrosoftWordReaderHandler")
111
+ local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
112
+ chain = msword_handler
113
+ current_handler = msword_handler.set_next(local_file_writer_handler)
114
+
115
+ elif input_type == "microsoft_excel":
116
+ xls_hanlder = HandlerFactory.get_handler("MicrosoftExcelReaderHandler")
117
+ local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
118
+ chain = xls_hanlder
119
+ current_handler = xls_hanlder.set_next(local_file_writer_handler)
120
+ elif input_type == "microsoft_pp":
121
+ pp_handler = HandlerFactory.get_handler("MicrosoftPowerPointReaderHandler")
122
+ local_file_writer_handler = HandlerFactory.get_handler("LocalFileWriterHandler")
123
+ chain = pp_handler
124
+ current_handler = pp_handler.set_next(local_file_writer_handler)
125
+ else:
126
+ # For unsupported types, default to just summarization_handler
127
+ print("Unsupported file type.", input_type)
128
+ sys.exit(1)
129
+
130
+ # Anonymize data?
131
+ anonymize = args.anonymize in (True, 'true', '1')
132
+ if anonymize:
133
+ anonymize_handler = HandlerFactory.get_handler("AmazonComprehendPIITokenizeHandler")
134
+ current_handler = current_handler.set_next(anonymize_handler)
135
+
136
+ # Add the prompt and bedrock handlers.
137
+ prompt_handler = HandlerFactory.get_handler("PromptHandler")
138
+ bedrock_handler = HandlerFactory.get_handler("AmazonBedrockHandler")
139
+
140
+ # Determinate when / if we need to call summarization or Chat and in what order.
141
+
142
+ if args.chat and args.chat != None:
143
+ chat_handler = HandlerFactory.get_handler("AmazonBedrockChatHandler")
144
+ print("Enable chat", args)
145
+ if args.chat == 'sum_first':
146
+ current_handler = current_handler.set_next(prompt_handler).set_next(bedrock_handler).set_next(chat_handler)
147
+
148
+ elif args.chat == 'chat_only':
149
+ current_handler = current_handler.set_next(chat_handler)
150
+
151
+ else:
152
+ current_handler = current_handler.set_next(chat_handler)
153
+ current_handler = current_handler.set_next(prompt_handler).set_next(bedrock_handler)
154
+ else:
155
+ current_handler = current_handler.set_next(prompt_handler).set_next(bedrock_handler)
156
+
157
+ # Finally, if we have tokenized the content, let's untokenize
158
+ if anonymize:
159
+ unanonymize_handler = HandlerFactory.get_handler("AmazonComprehendPIIUntokenizeHandler")
160
+ current_handler = current_handler.set_next(unanonymize_handler)
161
+
162
+ # Copy to clipboard?
163
+ clipboard = os.getenv('CLIPBOARD_COPY', 'false').lower() in ('true', '1', 't')
164
+ if clipboard:
165
+ clipboard_handler = HandlerFactory.get_handler("ClipboardWriterHandler")
166
+ current_handler = current_handler.set_next(clipboard_handler)
167
+ print("\n\n ================================================\n The summary will be copied to your clipboard.\n ================================================\n")
168
+
169
+ return chain
170
+
171
+ def process_file(file_path, args):
172
+ print(f"Processing: {file_path}")
173
+ input_type = determine_input_type(file_path)
174
+
175
+ handler_chain = construct_chain(input_type, args)
176
+
177
+ # Prepare the output filename with the current date and time
178
+ current_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
179
+ local_dir = os.getenv('DIR_STORAGE', './downloads')
180
+ output_file = f"{local_dir}/output_{os.path.basename(file_path)}_{current_time}.txt"
181
+
182
+
183
+ request = {
184
+ "type": input_type,
185
+ "path": file_path,
186
+ "prompt_file_name": args.prompt_file_name,
187
+ "text": "",
188
+ "write_file_path": output_file,
189
+ "extract_media": True
190
+ }
191
+
192
+ result = handler_chain.handle(request)
193
+ return result
194
+
195
+
196
+ def main():
197
+ # Initialize the argument parser
198
+ parser = argparse.ArgumentParser(description='Process input files or URLs.')
199
+
200
+ # Required positional argument for the file/URL to process
201
+ parser.add_argument('path', type=str, help='The path to the file, folder or URL to be processed.')
202
+
203
+ # Optional positional argument for the prompt file name, with a default value
204
+ parser.add_argument('prompt_file_name', nargs='?', default='default_prompt', help='The name of the prompt file. Defaults to "default_prompt" if not specified.')
205
+
206
+ # Optional flag to specify the use of a interactive chat handler
207
+ parser.add_argument("--chat", type=str, default=None, choices=[None, 'sum_first', 'chat_first', 'chat_only'], help="Choose 'sum_first', 'chat_first' to summarize before chat or direct chat interaction with your original text. Select 'chat_only' if you only want to query your original text")
208
+
209
+ # Optional flag to turn off anonymization
210
+ parser.add_argument("--anonymize", type=str, default=True, help="Anonymize customer names before sending to the model. By default this is set to true.")
211
+
212
+ # Optional argument for config file path
213
+ parser.add_argument("--config", type=str, help="Path to the config file. If not provided, will search for config.yaml in the current directory and its parents.")
214
+
215
+ # Parse the command-line arguments
216
+ args = parser.parse_args()
217
+
218
+ # Load configuration
219
+ from awschain.utils.config_loader import ConfigLoader
220
+ ConfigLoader.load_config(args.config)
221
+
222
+ # Handler discovery
223
+ HandlerFactory.discover_handlers()
224
+
225
+ if os.path.isdir(args.path):
226
+ max_processes = int(ConfigLoader.get_config('MAX_PARALLEL_PROCESSES', 1))
227
+ with ThreadPoolExecutor(max_workers=max_processes) as executor:
228
+ futures = [executor.submit(process_file, os.path.join(args.path, f), args) for f in os.listdir(args.path) if os.path.isfile(os.path.join(args.path, f))]
229
+ for future in as_completed(futures):
230
+ result = future.result()
231
+ else:
232
+ result = process_file(args.path, args)
233
+
234
+ if result.get("text", None):
235
+ print(result.get("text"))
236
+ else:
237
+ print(json.dumps(result, indent=2))
238
+
239
+ if __name__ == "__main__":
240
+ main()
@@ -0,0 +1,5 @@
1
+ from .handler_factory import HandlerFactory
2
+ from .abstract_handler import AbstractHandler
3
+ from .base_handler import BaseHandler
4
+
5
+ __all__ = ['HandlerFactory', 'AbstractHandler', 'BaseHandler']
@@ -0,0 +1,26 @@
1
+ from __future__ import annotations
2
+ from abc import abstractmethod
3
+ from typing import Any
4
+ from .base_handler import BaseHandler
5
+
6
+ class AbstractHandler(BaseHandler):
7
+ """
8
+ The default chaining behavior can be implemented inside a base handler
9
+ class.
10
+ """
11
+
12
+ _next_handler: Handler = None
13
+
14
+ def set_next(self, handler: Handler) -> Handler:
15
+ self._next_handler = handler
16
+ # Returning a handler from here will let us link handlers in a
17
+ # convenient way like this:
18
+ # local_file_handler.set_next(prompt_handler).set_next(summarization_handler)
19
+ return handler
20
+
21
+ @abstractmethod
22
+ def handle(self, request: dict) -> dict:
23
+ if self._next_handler:
24
+ return self._next_handler.handle(request)
25
+
26
+ return request
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+ from abc import ABC, abstractmethod
3
+ from typing import Optional
4
+
5
+ class BaseHandler(ABC):
6
+ """
7
+ The Handler interface declares a method for building the chain of handlers.
8
+ It also declares a method for executing a request.
9
+ """
10
+
11
+ @abstractmethod
12
+ def set_next(self, handler: Handler) -> Handler:
13
+ pass
14
+
15
+ @abstractmethod
16
+ def handle(self, request) -> Optional[dict]:
17
+ pass