pyproxytools 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ """
2
+ pyproxy.utils.http_req.py
3
+
4
+ HTTP request parsing utilities for pyproxy.
5
+ """
6
+
7
+
8
+ def extract_headers(request_str):
9
+ """
10
+ Extracts the HTTP headers from a raw HTTP request string.
11
+
12
+ Args:
13
+ request_str (str): The full HTTP request as a decoded string.
14
+
15
+ Returns:
16
+ dict: A dictionary containing the HTTP header fields as key-value pairs.
17
+ """
18
+ headers = {}
19
+ lines = request_str.split("\n")[1:]
20
+ for line in lines:
21
+ if line.strip():
22
+ key, value = line.split(":", 1)
23
+ headers[key.strip()] = value.strip()
24
+ return headers
25
+
26
+
27
+ def parse_url(url):
28
+ """
29
+ Parses the URL to extract the host and port for connecting to the target server.
30
+
31
+ Args:
32
+ url (str): The URL to be parsed.
33
+
34
+ Returns:
35
+ tuple: The server host and port.
36
+ """
37
+ http_pos = url.find("//")
38
+ if http_pos != -1:
39
+ url = url[(http_pos + 2) :]
40
+ port_pos = url.find(":")
41
+ path_pos = url.find("/")
42
+ if path_pos == -1:
43
+ path_pos = len(url)
44
+
45
+ server_host = (
46
+ url[:path_pos] if port_pos == -1 or port_pos > path_pos else url[:port_pos]
47
+ )
48
+ if port_pos == -1 or port_pos > path_pos:
49
+ server_port = 80
50
+ else:
51
+ server_port = int(url[(port_pos + 1) : path_pos])
52
+
53
+ return server_host, server_port
@@ -0,0 +1,46 @@
1
+ """
2
+ pyproxy.utils.logger.py
3
+
4
+ This module contains functions to configure and return loggers for both console and file output.
5
+ """
6
+
7
+ import logging
8
+ import os
9
+
10
+
11
+ def configure_console_logger() -> logging.Logger:
12
+ """
13
+ Configures and returns a logger that outputs log messages to the console.
14
+
15
+ Returns:
16
+ logging.Logger: A logger instance that writes logs to the console.
17
+ """
18
+ console_logger = logging.getLogger("ConsoleLogger")
19
+ console_logger.setLevel(logging.INFO)
20
+ console_formatter = logging.Formatter(
21
+ "%(asctime)s - %(message)s", datefmt="%d/%m/%Y %H:%M:%S"
22
+ )
23
+ console_handler = logging.StreamHandler()
24
+ console_handler.setFormatter(console_formatter)
25
+ console_logger.addHandler(console_handler)
26
+ return console_logger
27
+
28
+
29
+ def configure_file_logger(log_path: str, name: str) -> logging.Logger:
30
+ """
31
+ Configures and returns a logger that writes log messages to a specified file.
32
+
33
+ Args:
34
+ log_path (str): The path where the log file will be created or appended to.
35
+ name (str): Logger's name.
36
+
37
+ Returns:
38
+ logging.Logger: A logger instance that writes to the specified log file.
39
+ """
40
+ os.makedirs(os.path.dirname(log_path), exist_ok=True)
41
+ file_logger = logging.getLogger(name)
42
+ file_logger.setLevel(logging.INFO)
43
+ file_handler = logging.FileHandler(log_path)
44
+ file_handler.setFormatter(logging.Formatter("%(asctime)s - %(message)s"))
45
+ file_logger.addHandler(file_handler)
46
+ return file_logger
File without changes
@@ -0,0 +1,130 @@
1
+ Metadata-Version: 2.4
2
+ Name: pyproxytools
3
+ Version: 0.3.2
4
+ Summary: Lightweight and fast python web proxy
5
+ Author: 6C656C65
6
+ License-Expression: MIT
7
+ Project-URL: Documentation, https://github.com/6C656C65/pyproxy/wiki
8
+ Project-URL: Issue tracker, https://github.com/6C656C65/pyproxy/issues
9
+ Classifier: Development Status :: 5 - Production/Stable
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Natural Language :: English
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Internet
19
+ Classifier: Topic :: Software Development :: Libraries
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Topic :: Utilities
22
+ Classifier: Typing :: Typed
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: rich-argparse>=1.7.0
26
+ Requires-Dist: pyOpenSSL>=25.0.0
27
+ Requires-Dist: requests>=2.31.0
28
+ Requires-Dist: Flask>=3.1.0
29
+ Requires-Dist: Flask-HTTPAuth>=4.8.0
30
+ Requires-Dist: psutil>=5.9.8
31
+ Dynamic: license-file
32
+
33
+ <div align="center">
34
+ <h1>pyproxy</h1>
35
+ </div>
36
+
37
+
38
+ **pyproxy** is a lightweight, fast, and customizable Python-based web proxy server designed to handle both HTTP and HTTPS traffic efficiently. It can be used for various purposes, including web scraping, traffic monitoring, and content filtering.
39
+
40
+ <p align="center">
41
+ <img src="https://img.shields.io/github/license/6C656C65/pyproxy?style=for-the-badge">
42
+ <img src="https://img.shields.io/github/issues/6C656C65/pyproxy?style=for-the-badge">
43
+ <img src="https://img.shields.io/github/issues-closed/6C656C65/pyproxy?style=for-the-badge">
44
+ <br>
45
+ <img src="https://img.shields.io/github/forks/6C656C65/pyproxy?style=for-the-badge">
46
+ <img src="https://img.shields.io/github/stars/6C656C65/pyproxy?style=for-the-badge">
47
+ <img src="https://img.shields.io/github/commit-activity/w/6C656C65/pyproxy?style=for-the-badge">
48
+ <img src="https://img.shields.io/github/contributors/6C656C65/pyproxy?style=for-the-badge">
49
+ <br>
50
+ <img src="https://img.shields.io/github/actions/workflow/status/6C656C65/pyproxy/code-scan.yml?label=Scan&style=for-the-badge">
51
+ <img src="https://img.shields.io/github/actions/workflow/status/6C656C65/pyproxy/unittest.yml?label=Tests&style=for-the-badge">
52
+ <img src="https://img.shields.io/github/actions/workflow/status/6C656C65/pyproxy/docker-images.yml?label=Delivery&style=for-the-badge">
53
+ </p>
54
+
55
+ ---
56
+
57
+ ## ⚡ **Features**
58
+
59
+ | Feature | Supported |
60
+ |----------------------------------------------|-----------|
61
+ | HTTP & HTTPS | ✅ |
62
+ | Web request logging | ✅ |
63
+ | Domain & URL blacklist | ✅ |
64
+ | SSL inspection | ✅ |
65
+ | Custom 403 Forbidden page | ✅ |
66
+ | Remote (HTTP) blacklist support | ✅ |
67
+ | Shortcut support | ✅ |
68
+ | Disable inspection for banking websites | ✅ |
69
+ | Custom headers | ✅ |
70
+ | Web interface monitoring | ✅ |
71
+ | Lightweight Docker image | ✅ |
72
+ | Proxy chaining (multi-proxy forwarding) | ✅ |
73
+ | IP whitelist with subnet support | ✅ |
74
+
75
+ ## 📦 **Installation**
76
+
77
+ ### Install from source
78
+ ```bash
79
+ git clone https://github.com/6C656C65/pyproxy.git
80
+ cd pyproxy
81
+ pip install -r requirements.txt
82
+ ```
83
+
84
+ ### Install with Docker
85
+ ```bash
86
+ docker pull ghcr.io/6c656c65/pyproxy:latest
87
+ docker run -d ghcr.io/6c656c65/pyproxy:latest
88
+ ```
89
+ You can use slim images by adding `-slim` to the end of the tags
90
+
91
+ ## 🚀 **Usage**
92
+
93
+ ### Start the proxy
94
+ ```bash
95
+ python3 -m pyproxy.pyproxy
96
+ ```
97
+ The proxy will be available at: `0.0.0.0:8080`.
98
+ The access log will be available at `./logs/access.log`.
99
+
100
+ ## 📚 **Documentation**
101
+ If you encounter any problems, or if you want to use the program in a particular way, I advise you to read the [documentation](https://github.com/6C656C65/pyproxy/wiki).
102
+
103
+ ## 🔧 **To do**
104
+
105
+ - Support content analysis
106
+ - Caching of latest and most searched pages
107
+
108
+ ## 🏎️ **Benchmark**
109
+
110
+ If you're interested in benchmarking the performance of the proxy or comparing request times with and without a proxy, please refer to the [Benchmark README](benchmark/README.md) for detailed instructions on how to run the benchmarking tests and generate reports.
111
+
112
+ ## 📄 **License**
113
+
114
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
115
+
116
+ ## 🤝 **Contributing**
117
+
118
+ Contributions are welcome and appreciated! If you'd like to improve this project, feel free to fork the repository and submit a pull request. Whether it's fixing bugs, adding new features, improving documentation, or suggesting enhancements, every bit helps. Please make sure to follow the coding standards and test your changes before submitting. Let's build something great together!
119
+
120
+ ## 📦 Deployment with Ansible
121
+
122
+ If you want to deploy **pyproxy** automatically to remote servers (via source or Docker), an official [Ansible role](https://github.com/6C656C65/pyproxy_ansible) is available:
123
+
124
+ * 🔧 Install from source or run as a Docker container
125
+ * 📁 Supports customization of ports, versions, and paths
126
+ * 🚀 Easily integrable into your infrastructure or CI/CD pipelines
127
+
128
+ 👉 Check out the [ansible-role-pyproxy](https://github.com/6C656C65/pyproxy_ansible) repository for more details and usage instructions.
129
+
130
+ ---
@@ -0,0 +1,40 @@
1
+ benchmark/benchmark.py,sha256=E_LLmJ1prZ_QghkNUdUOfe87tGxOxqdn-z49LFtniQY,5427
2
+ benchmark/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ benchmark/utils/html.py,sha256=PpEv92f0b2rhK_Pcu2019Vkn7Q0XMka_ai4PQic7__s,5460
4
+ benchmark/utils/req.py,sha256=85b59kCtY6dXigahXcNmZXxbOshvNuBYxRTVJGQKPNQ,1200
5
+ pyproxy/__init__.py,sha256=CMJC6v1rgmB-ZhMt-5gbBt0wMc7MS4pkhSzF3ZuMM-U,270
6
+ pyproxy/pyproxy.py,sha256=sJGxPIYnnn-hR9COvcBpjED8rruCYfa30PHfBPwVmus,4124
7
+ pyproxy/server.py,sha256=xA-RHd6FNWJB4Jr1pZzJbPcmC4ioezWqCJcf_jMm6XE,12437
8
+ pyproxy/handlers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ pyproxy/handlers/client.py,sha256=PYttEu7Y0quDw1L0vHlkUmsJHVTzimiF5Qw82TzjDTo,5148
10
+ pyproxy/handlers/http.py,sha256=egz88l7Lnr_w1HYPKQuK727rALYHPpy6ilgZFKZBIJo,7623
11
+ pyproxy/handlers/https.py,sha256=ysv1uYrJLgJMyMr7luqgCKPXGhUq1hpDESVb3cHp9aE,12879
12
+ pyproxy/modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ pyproxy/modules/cancel_inspect.py,sha256=OyyOLsgyGfesIV8QlhtB_z1Gw0aZlQI1opLjF_konKI,2615
14
+ pyproxy/modules/custom_header.py,sha256=tjqvZ1FPnEIbQRhvy1YYANzQyTbkn0zKOK_83h6Sf6E,2513
15
+ pyproxy/modules/filter.py,sha256=2S_tQcyfBB-O741Q2zTnrg9IudIkKjzHIbsLMYU6j20,5111
16
+ pyproxy/modules/shortcuts.py,sha256=7qw0veqn5WCy0iOsNlg-p6hhUyGnTnz6ZIORfPvZvtg,2557
17
+ pyproxy/monitoring/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
+ pyproxy/monitoring/web.py,sha256=DTszt48G7sXpCqCetHAb2EI-1wauRxG-Og1sSRUEiug,9966
19
+ pyproxy/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ pyproxy/utils/args.py,sha256=8LNJa5hwtQrVo3V3bT_eqzYg43_8I78X885zVEV-T1M,5884
21
+ pyproxy/utils/config.py,sha256=amTSmoD5WH0JSoArLvMiG6BFvN_c5bgXbABLbPWAEPk,3283
22
+ pyproxy/utils/crypto.py,sha256=eEEL9sujn4P4gY5_VIbarXyGk_wESjUov2l8-UN6Bo0,1672
23
+ pyproxy/utils/http_req.py,sha256=fA-L2RJwdMHXAcVNEvD3I_r1jpPZVx74S8ZSeVzXJUA,1301
24
+ pyproxy/utils/logger.py,sha256=VHQa4GmcmH_Dp2uBq2_kd8xOnHmSCxOtsGkNTthCBDQ,1500
25
+ pyproxy/utils/version.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
+ pyproxytools-0.3.2.dist-info/licenses/LICENSE,sha256=TDdWrw5utS7dNRdzxWj_1RM54vnlX0lYzL-GS82pIhA,1065
27
+ tests/modules/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
+ tests/modules/test_cancel_inspect.py,sha256=JRIf8TvDS8NCFY_HCX7fuNbhfHDqnVCoT2ZuvBNuZho,2123
29
+ tests/modules/test_custom_header.py,sha256=CmBJIjIvAgTfEdeNivrxxDbWrOCM4n9J8PNndhKER50,2272
30
+ tests/modules/test_filter.py,sha256=6sNG1IYtmsxnIUvhobRQAVFPN-O-CTWEiMuRsX2BqZ8,7078
31
+ tests/modules/test_shortcuts.py,sha256=Ly717FJX-Xc2DYzLyoKvJkqr7PDpPRAgt2yyPeR2-So,4513
32
+ tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
+ tests/utils/test_crypto.py,sha256=7WKd3Ots9QjcWKj_AM93kNNB4ODZGwZ8pxryiQT5w94,3668
34
+ tests/utils/test_http_req.py,sha256=E0hGMHmhAk0nar70sL9XvGY2H2U_t7f8rdVALwXFurI,1971
35
+ tests/utils/test_logger.py,sha256=OkH68HzIJuIMPQN-webzdGjNUMeg6Qu-mWVk-hhzrA8,2139
36
+ pyproxytools-0.3.2.dist-info/METADATA,sha256=r_gYxlruCMLllAnHwDrRD_1irryodL_Yw62inwf1G6U,5729
37
+ pyproxytools-0.3.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
38
+ pyproxytools-0.3.2.dist-info/entry_points.txt,sha256=qr3JIpFvNnWEJAkr6krRZDC2paiG_P_VlIQ6QmTHZ88,49
39
+ pyproxytools-0.3.2.dist-info/top_level.txt,sha256=QSDF6at2BiAjnYt-WyhDBCDUzlzn_lzOAwQ4v3H5ZVE,24
40
+ pyproxytools-0.3.2.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ pyproxy = pyproxy.pyproxy:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 6C656C65
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,3 @@
1
+ benchmark
2
+ pyproxy
3
+ tests
File without changes
@@ -0,0 +1,67 @@
1
+ """
2
+ test_cancel_inspection.py
3
+
4
+ This test module verifies the functionality of the `cancel_inspection.py` module.
5
+
6
+ Tests:
7
+ - test_load_cancel_inspect: Ensures entries are correctly loaded from a test file.
8
+ - test_cancel_inspect_process: Validates the multiprocessing behavior of
9
+ checking cancel inspection entries.
10
+ """
11
+
12
+ import unittest
13
+ import tempfile
14
+ import os
15
+ import multiprocessing
16
+ import time
17
+
18
+ from pyproxy.modules.cancel_inspect import load_cancel_inspect, cancel_inspect_process
19
+
20
+
21
+ class TestCancelInspect(unittest.TestCase):
22
+ """Unit tests for the cancel_inspection.py module."""
23
+
24
+ def setUp(self):
25
+ """Set up a temporary file with test data for cancel inspection."""
26
+ self.temp_file = tempfile.NamedTemporaryFile(mode="w+", delete=False)
27
+ self.temp_file.write("http://example.com/1\nhttp://example.com/2\n")
28
+ self.temp_file.close()
29
+ self.path = self.temp_file.name
30
+
31
+ def tearDown(self):
32
+ """Remove the temporary file after tests complete."""
33
+ os.unlink(self.path)
34
+
35
+ def test_load_cancel_inspect(self):
36
+ """Test that the cancel inspection file is correctly loaded into a list."""
37
+ entries = load_cancel_inspect(self.path)
38
+ self.assertEqual(len(entries), 2)
39
+ self.assertIn("http://example.com/1\n", entries)
40
+ self.assertIn("http://example.com/2\n", entries)
41
+
42
+ def test_cancel_inspect_process(self):
43
+ """Test that the cancel inspection process returns the correct match result."""
44
+ queue = multiprocessing.Queue()
45
+ result_queue = multiprocessing.Queue()
46
+
47
+ process = multiprocessing.Process(
48
+ target=cancel_inspect_process, args=(queue, result_queue, self.path)
49
+ )
50
+ process.start()
51
+
52
+ time.sleep(1)
53
+
54
+ queue.put("http://example.com/1\n")
55
+ result = result_queue.get(timeout=3)
56
+ self.assertTrue(result)
57
+
58
+ queue.put("http://nonexistent.com/\n")
59
+ result = result_queue.get(timeout=3)
60
+ self.assertFalse(result)
61
+
62
+ process.terminate()
63
+ process.join()
64
+
65
+
66
+ if __name__ == "__main__":
67
+ unittest.main()
@@ -0,0 +1,70 @@
1
+ """
2
+ test_custom_header.py
3
+
4
+ This test module verifies the functionality of the `custom_header.py` module.
5
+
6
+ Tests:
7
+ - test_load_custom_header: Ensures JSON headers are correctly loaded from a test file.
8
+ - test_custom_header_process: Validates the multiprocessing behavior of resolving custom headers.
9
+ """
10
+
11
+ import unittest
12
+ import tempfile
13
+ import os
14
+ import multiprocessing
15
+ import time
16
+ import json
17
+
18
+ from pyproxy.modules.custom_header import load_custom_header, custom_header_process
19
+
20
+
21
+ class TestCustomHeader(unittest.TestCase):
22
+ """Unit tests for the custom_header.py module."""
23
+
24
+ def setUp(self):
25
+ """Set up a temporary JSON file with test data for custom headers."""
26
+ self.temp_file = tempfile.NamedTemporaryFile(mode="w+", delete=False)
27
+ self.sample_data = {
28
+ "http://example.com": {"X-Test-Header": "123", "X-Another": "456"},
29
+ "http://another.com": {"X-Custom": "abc"},
30
+ }
31
+ json.dump(self.sample_data, self.temp_file)
32
+ self.temp_file.close()
33
+ self.path = self.temp_file.name
34
+
35
+ def tearDown(self):
36
+ """Remove the temporary file after tests complete."""
37
+ os.unlink(self.path)
38
+
39
+ def test_load_custom_header(self):
40
+ """Test that the custom header JSON file is correctly loaded into a dictionary."""
41
+ headers = load_custom_header(self.path)
42
+ self.assertEqual(headers["http://example.com"]["X-Test-Header"], "123")
43
+ self.assertIn("http://another.com", headers)
44
+
45
+ def test_custom_header_process(self):
46
+ """Test that the custom header process returns the correct header dictionary."""
47
+ queue = multiprocessing.Queue()
48
+ result_queue = multiprocessing.Queue()
49
+
50
+ process = multiprocessing.Process(
51
+ target=custom_header_process, args=(queue, result_queue, self.path)
52
+ )
53
+ process.start()
54
+
55
+ time.sleep(1)
56
+
57
+ queue.put("http://example.com")
58
+ result = result_queue.get(timeout=3)
59
+ self.assertEqual(result, {"X-Test-Header": "123", "X-Another": "456"})
60
+
61
+ queue.put("http://nonexistent.com")
62
+ result = result_queue.get(timeout=3)
63
+ self.assertEqual(result, {})
64
+
65
+ process.terminate()
66
+ process.join()
67
+
68
+
69
+ if __name__ == "__main__":
70
+ unittest.main()
@@ -0,0 +1,185 @@
1
+ """
2
+ test_filter.py
3
+
4
+ This module contains unit tests for the `filter.py` module.
5
+ It verifies the correct functionality of loading blacklists and filtering domains/URLs.
6
+
7
+ Tested Functions:
8
+ - load_blacklist: Ensures that the blacklist is correctly loaded from the file.
9
+ - filter_process: Ensures that domains/URLs are correctly filtered based on the blacklist.
10
+
11
+ Test Cases:
12
+ - TestLoadBlacklist: Checks the correct loading of blocked sites and URLs from the file.
13
+ - TestFilterProcess: Verifies that domains/URLs are correctly identified as blocked or allowed.
14
+ - TestLoadBlacklistFileNotFound: Verifies that a FileNotFoundError is
15
+ raised when the blacklist file is missing.
16
+ - TestLoadBlacklistHttpError: Verifies that an HTTP error is handled
17
+ correctly when loading blacklists.
18
+ - TestLoadBlacklistEmptyFile: Verifies that an empty file returns empty
19
+ sets for blocked sites and URLs.
20
+ - TestFilterProcessWithPathAndPort: Verifies that URLs with paths or ports are correctly filtered.
21
+ """
22
+
23
+ import unittest
24
+ import multiprocessing
25
+ from unittest.mock import patch, mock_open
26
+ import requests
27
+ from pyproxy.modules.filter import load_blacklist, filter_process
28
+
29
+
30
+ class TestFilter(unittest.TestCase):
31
+ """
32
+ Test suite for the filter module.
33
+ """
34
+
35
+ def setUp(self):
36
+ """Sets up the common resources for tests."""
37
+ self.queue = multiprocessing.Queue()
38
+ self.result_queue = multiprocessing.Queue()
39
+
40
+ def tearDown(self):
41
+ """Cleans up after each test."""
42
+ while not self.queue.empty():
43
+ self.queue.get_nowait()
44
+ while not self.result_queue.empty():
45
+ self.result_queue.get_nowait()
46
+
47
+ def test_load_blacklist(self):
48
+ """Tests if the blacklist is correctly loaded from the file."""
49
+ with patch(
50
+ "builtins.open",
51
+ new_callable=mock_open,
52
+ read_data="blocked.com\nallowed.com/blocked",
53
+ ):
54
+ blocked_sites, blocked_urls = load_blacklist(
55
+ "blocked_sites.txt", "blocked_urls.txt", "local"
56
+ )
57
+ self.assertIn("blocked.com", blocked_sites)
58
+ self.assertIn("allowed.com/blocked", blocked_sites)
59
+ self.assertIsInstance(blocked_sites, set)
60
+ self.assertIsInstance(blocked_urls, set)
61
+
62
+ @patch("builtins.open", side_effect=FileNotFoundError("File not found"))
63
+ def test_load_blacklist_file_not_found(self, _mock_file):
64
+ """Tests that a FileNotFoundError is raised when the blacklist file is missing."""
65
+ with self.assertRaises(FileNotFoundError):
66
+ load_blacklist("invalid_file.txt", "blocked_urls.txt", "local")
67
+
68
+ @patch(
69
+ "requests.get",
70
+ side_effect=requests.exceptions.RequestException("Failed to load"),
71
+ )
72
+ def test_load_blacklist_http_error(self, _mock_request):
73
+ """Tests that an HTTP error is handled correctly when loading blacklists."""
74
+ with self.assertRaises(requests.exceptions.RequestException):
75
+ load_blacklist(
76
+ "http://example.com/blocked_sites",
77
+ "http://example.com/blocked_urls",
78
+ "http",
79
+ )
80
+
81
+ @patch("builtins.open", new_callable=mock_open, read_data="")
82
+ def test_load_blacklist_empty_file(self, _mock_file):
83
+ """Tests that an empty file returns empty sets for blocked sites and URLs."""
84
+ blocked_sites, blocked_urls = load_blacklist(
85
+ "empty_sites.txt", "empty_urls.txt", "local"
86
+ )
87
+ self.assertEqual(len(blocked_sites), 0)
88
+ self.assertEqual(len(blocked_urls), 0)
89
+
90
+ def _test_filter_process_helper(
91
+ self,
92
+ input_urls,
93
+ expected_results,
94
+ patch_data="blocked.com\nallowed.com/blocked",
95
+ ):
96
+ """Helper method to test filter_process with different inputs."""
97
+ with patch("builtins.open", new_callable=mock_open, read_data=patch_data):
98
+ process = multiprocessing.Process(
99
+ target=filter_process,
100
+ args=(
101
+ self.queue,
102
+ self.result_queue,
103
+ "local",
104
+ "blocked_sites.txt",
105
+ "blocked_urls.txt",
106
+ ),
107
+ )
108
+ process.start()
109
+
110
+ for url in input_urls:
111
+ self.queue.put(url)
112
+
113
+ results = []
114
+ for _ in expected_results:
115
+ results.append(self.result_queue.get(timeout=2))
116
+
117
+ self.assertEqual(results, expected_results)
118
+ process.terminate()
119
+ process.join()
120
+
121
+ def test_filter_process(self):
122
+ """Tests if domains/URLs are correctly identified as blocked or allowed."""
123
+ input_urls = [
124
+ "http://blocked.com/",
125
+ "http://allowed.com/",
126
+ "http://allowed.com/blocked",
127
+ "http://allowed.com/allowed",
128
+ ]
129
+ expected_results = [
130
+ ("blocked.com", "Blocked"),
131
+ ("allowed.com", "Allowed"),
132
+ ("allowed.com/blocked", "Blocked"),
133
+ ("allowed.com", "Allowed"),
134
+ ]
135
+ self._test_filter_process_helper(input_urls, expected_results)
136
+
137
+ def test_filter_process_with_query_string(self):
138
+ """Tests if URLs with query strings are correctly filtered."""
139
+ input_urls = [
140
+ "http://blocked.com?tracking=123",
141
+ "http://example.com/secret?auth=false",
142
+ "http://safe.com/page?debug=true",
143
+ ]
144
+ expected_results = [
145
+ ("blocked.com", "Blocked"),
146
+ ("example.com/secret", "Blocked"),
147
+ ("safe.com", "Allowed"),
148
+ ]
149
+ self._test_filter_process_helper(
150
+ input_urls, expected_results, patch_data="blocked.com\nexample.com/secret"
151
+ )
152
+
153
+ def test_filter_process_subdomain_not_blocked(self):
154
+ """
155
+ Tests if subdomains are correctly handled and not blocked if the main domain is not blocked.
156
+ """
157
+ input_urls = ["http://sub.blocked.com/"]
158
+ expected_results = [("sub.blocked.com", "Allowed")]
159
+ self._test_filter_process_helper(
160
+ input_urls, expected_results, patch_data="blocked.com\n"
161
+ )
162
+
163
+ def test_filter_process_special_characters(self):
164
+ """Tests if URLs with special characters are correctly handled."""
165
+ input_urls = ["http://weird-site.com/"]
166
+ expected_results = [("weird-site.com", "Blocked")]
167
+ self._test_filter_process_helper(
168
+ input_urls, expected_results, patch_data="weird-site.com\n"
169
+ )
170
+
171
+ def test_filter_process_with_path_and_port(self):
172
+ """Tests if URLs with paths and ports are correctly filtered."""
173
+ input_urls = [
174
+ "http://blocked.com:8080/path/to/resource",
175
+ "http://allowed.com/blocked/resource",
176
+ ]
177
+ expected_results = [
178
+ ("blocked.com", "Blocked"),
179
+ ("allowed.com/blocked/resource", "Blocked"),
180
+ ]
181
+ self._test_filter_process_helper(input_urls, expected_results)
182
+
183
+
184
+ if __name__ == "__main__":
185
+ unittest.main()