doculift 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ include README.md
2
+ include requirements.txt
3
+ recursive-include src/doculift/templates *
4
+ recursive-include src/doculift/static *
5
+ global-exclude *.py[cod]
6
+ global-exclude __pycache__
@@ -0,0 +1,229 @@
1
+ Metadata-Version: 2.4
2
+ Name: doculift
3
+ Version: 0.1.0
4
+ Summary: A powerful CLI & web scraper that lifts documentation for Large Language Models.
5
+ Author: M.J. Shetty
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/mjshetty/doculift
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Topic :: Utilities
13
+ Requires-Python: >=3.10
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: flask>=3.0.0
16
+ Requires-Dist: requests
17
+ Requires-Dist: beautifulsoup4
18
+ Requires-Dist: playwright
19
+ Requires-Dist: click
20
+ Requires-Dist: rich
21
+ Provides-Extra: dev
22
+ Requires-Dist: black; extra == "dev"
23
+ Requires-Dist: flake8; extra == "dev"
24
+ Requires-Dist: bandit; extra == "dev"
25
+ Requires-Dist: build; extra == "dev"
26
+ Requires-Dist: twine; extra == "dev"
27
+
28
+ # DocuLift
29
+
30
+ **DocuLift** is a web scraping tool that lifts documentation websites into clean, aggregated files optimized for feeding into Large Language Models like Google NotebookLM, Claude, or ChatGPT.
31
+
32
+ It handles dynamic Single Page Applications (SPAs), respects site structure, and produces output in two modes: full content extraction or URL-only extraction.
33
+
34
+ ---
35
+
36
+ ## Features
37
+
38
+ - **Two Extract Modes** — choose between extracting full page content or just collecting URLs (see [When to Use Each Mode](#when-to-use-each-mode))
39
+ - **Dynamic Content Scraping** — uses Playwright (headless Chromium) to render JavaScript-heavy sites (React, Vue, etc.) before extraction
40
+ - **Smart Scoping**:
41
+ - **Section Only** — stays within the folder boundary of the starting URL (e.g. starting at `.../docs/agents/overview` scrapes everything under `.../docs/agents/`)
42
+ - **Entire Domain** — crawls all pages under the target domain
43
+ - **Intelligent Aggregation** — combines multiple pages into single files, auto-splits at ~500KB (NotebookLM's per-file limit), generates meaningful filenames
44
+ - **Multi-URL Support** — submit multiple starting URLs in one job; each is crawled independently and produces its own output file(s)
45
+ - **Per-URL stats** — on completion, the UI shows how many pages or URLs were collected per starting URL
46
+ - **Clean Extraction** — removes navigation, footers, sidebars, ads, and scripts; focuses on main content
47
+
48
+ ---
49
+
50
+ ## When to Use Each Mode
51
+
52
+ ### Extract Content
53
+ Crawls each page and converts its content to Markdown (or text/CSV). Use this when you want to feed documentation directly into an LLM as context.
54
+
55
+ - **Best for**: NotebookLM, Claude Projects, ChatGPT — any tool that accepts uploaded documents
56
+ - **Output**: One or more `.md` files per starting URL, split at ~500KB
57
+ - **Typical workflow**: Extract content → upload files to NotebookLM → ask questions
58
+
59
+ ### Extract URLs Only
60
+ Crawls the site and collects every discovered URL within scope, writing them to a plain `.txt` file — one URL per line, no other content.
61
+
62
+ **Use this when NotebookLM's URL limit is the bottleneck.**
63
+
64
+ NotebookLM supports adding web URLs as sources, but has a cap on how many you can add per notebook. When a documentation section has hundreds of pages, you'll hit that limit quickly. The recommended two-step workflow is:
65
+
66
+ 1. **Run "Extract URLs Only"** on the target documentation to get a full list of all pages within scope
67
+ 2. **Review and trim** the URL list down to the most relevant pages
68
+ 3. **Add the trimmed URLs directly to NotebookLM** as web sources — NotebookLM fetches and indexes them itself, giving you live, citable sources rather than static file uploads
69
+
70
+ This approach gives you fine-grained control over exactly which pages NotebookLM indexes, without wasting your URL quota on irrelevant pages.
71
+
72
+ ---
73
+
74
+ ## Tech Stack
75
+
76
+ | Layer | Technology |
77
+ |---|---|
78
+ | Backend | Python 3.10+, Flask |
79
+ | Scraping | Playwright (headless Chromium) |
80
+ | Parsing | BeautifulSoup4 |
81
+ | Frontend | HTML5, CSS (Glassmorphism), Vanilla JS |
82
+ | CI/CD | GitHub Actions, Black, Flake8, Bandit |
83
+
84
+ ---
85
+
86
+ ## Continuous Integration (CI/CD)
87
+
88
+ DocuLift includes a pre-configured GitHub Actions pipeline (`.github/workflows/ci.yml`) that automatically runs on every push and pull request to the `main` or `master` branches.
89
+
90
+ The pipeline executes the following checks to ensure code quality and security:
91
+
92
+ 1. **Code Formatting (Black)**
93
+ - Automatically checks that all Python files adhere to standard `black` formatting rules.
94
+ 2. **Linting (Flake8)**
95
+ - Scans for syntax errors, undefined names, and unused imports.
96
+ - Enforces a maximum line length and complexity thresholds.
97
+ 3. **Security Scanning (Bandit)**
98
+ - Analyzes Python code for common security vulnerabilities.
99
+ - Ensures safe configurations (e.g., verifying `debug=False` for Flask in production environments).
100
+
101
+ *Note: The pipeline strictly fails if any high-severity security issues are found, preventing insecure code from being merged.*
102
+
103
+ ---
104
+
105
+ ## Installation
106
+
107
+ DocuLift is published on PyPI as `doculift`. We recommend installing it in a virtual environment or using `pipx`.
108
+
109
+ ### Prerequisites
110
+ - Python 3.10 or higher
111
+
112
+ ### Steps
113
+
114
+ 1. **Install the package via pip**
115
+ ```bash
116
+ pip install doculift
117
+ ```
118
+
119
+ 2. **Install Chromium (required for dynamic page scraping)**
120
+ ```bash
121
+ playwright install chromium
122
+ ```
123
+
124
+ 3. **Start the Web UI**
125
+ ```bash
126
+ doculift ui
127
+ ```
128
+ Open `http://127.0.0.1:5001` in your browser.
129
+
130
+ ---
131
+
132
+ ## Usage
133
+
134
+ DocuLift is a hybrid tool. You can run it via a beautiful Web interface, or directly from your terminal.
135
+
136
+ ### 1. Web User Interface
137
+
138
+ Start the local server:
139
+ ```bash
140
+ doculift ui
141
+ # or
142
+ doculift ui --port 5001
143
+ ```
144
+ Then open `http://127.0.0.1:5001` in your browser.
145
+
146
+ 1. **Enter target URLs** — one per line (e.g. `https://docs.docker.com/reference/`)
147
+ 2. **Choose Extract Mode** — *Extract Content* or *Extract URLs Only*
148
+ 3. **Choose Scoping Strategy** — *Section Only* (recommended) or *Entire Domain*
149
+ 4. **Choose Output Format** — Markdown, Plain Text, or CSV (applies to content mode)
150
+ 5. **Set Max Pages per URL** — default 500; each starting URL is crawled independently up to this limit
151
+ 6. **Click "Siphon Content"** and watch the progress bar
152
+ 7. On completion, per-URL stats are shown and files are available for download
153
+
154
+ ### 2. Command Line Interface (CLI)
155
+
156
+ Run extraction directly from your terminal with a beautiful progress bar. Files will be saved into the `./outputs` folder automatically.
157
+
158
+ ```bash
159
+ # See all available commands and options
160
+ doculift --help
161
+
162
+ # See options specific to the scrape command
163
+ doculift scrape --help
164
+
165
+ # Example: Extract full markdown content from a documentation section
166
+ doculift scrape https://docs.docker.com/reference/
167
+
168
+ # Example: Extract only URLs, capped at 1000 pages, from multiple sources
169
+ doculift scrape https://paketo.io/docs/ https://docs.docker.com/ --mode urls --max-pages 1000
170
+ ```
171
+
172
+ ---
173
+
174
+ ## How It Works
175
+
176
+ ```
177
+ User submits URLs + config
178
+
179
+ Background thread spawned (one per job)
180
+
181
+ For each starting URL:
182
+ ├── Determine scope (section boundary or full domain)
183
+ ├── BFS crawl with Playwright (handles JS rendering)
184
+ ├── [Content mode] Clean HTML → Markdown, buffer → split files at 500KB
185
+ └── [URL mode] Collect discovered links → single .txt file
186
+
187
+ Per-URL stats displayed, files available for download
188
+ ```
189
+
190
+ **Key crawl behaviours:**
191
+ - Each starting URL gets an independent BFS with its own visited set — URLs are not cross-contaminated between starting points
192
+ - `max_pages` applies per starting URL, not globally
193
+ - Pages already scraped by an earlier starting URL in the same job are skipped to avoid duplication
194
+ - Fragment URLs (`#anchor`) are normalised and deduplicated
195
+
196
+ ---
197
+
198
+ ## API
199
+
200
+ Trigger jobs programmatically:
201
+
202
+ ```bash
203
+ curl -X POST http://127.0.0.1:5001/scrape \
204
+ -H "Content-Type: application/json" \
205
+ -d '{
206
+ "urls": ["https://docs.docker.com/reference/", "https://paketo.io/docs/"],
207
+ "format": "md",
208
+ "max_pages": 200,
209
+ "scope_type": "section",
210
+ "extract_mode": "content"
211
+ }'
212
+ ```
213
+
214
+ Response:
215
+ ```json
216
+ { "job_id": "abc123" }
217
+ ```
218
+
219
+ Poll for status:
220
+ ```bash
221
+ curl http://127.0.0.1:5001/status/abc123
222
+ ```
223
+
224
+ Response fields: `status`, `progress`, `is_finished`, `files`, `per_url_stats`, `urls_extracted`.
225
+
226
+ Download a file:
227
+ ```
228
+ GET /download/<job_id>/<filename>
229
+ ```
@@ -0,0 +1,202 @@
1
+ # DocuLift
2
+
3
+ **DocuLift** is a web scraping tool that lifts documentation websites into clean, aggregated files optimized for feeding into Large Language Models like Google NotebookLM, Claude, or ChatGPT.
4
+
5
+ It handles dynamic Single Page Applications (SPAs), respects site structure, and produces output in two modes: full content extraction or URL-only extraction.
6
+
7
+ ---
8
+
9
+ ## Features
10
+
11
+ - **Two Extract Modes** — choose between extracting full page content or just collecting URLs (see [When to Use Each Mode](#when-to-use-each-mode))
12
+ - **Dynamic Content Scraping** — uses Playwright (headless Chromium) to render JavaScript-heavy sites (React, Vue, etc.) before extraction
13
+ - **Smart Scoping**:
14
+ - **Section Only** — stays within the folder boundary of the starting URL (e.g. starting at `.../docs/agents/overview` scrapes everything under `.../docs/agents/`)
15
+ - **Entire Domain** — crawls all pages under the target domain
16
+ - **Intelligent Aggregation** — combines multiple pages into single files, auto-splits at ~500KB (NotebookLM's per-file limit), generates meaningful filenames
17
+ - **Multi-URL Support** — submit multiple starting URLs in one job; each is crawled independently and produces its own output file(s)
18
+ - **Per-URL stats** — on completion, the UI shows how many pages or URLs were collected per starting URL
19
+ - **Clean Extraction** — removes navigation, footers, sidebars, ads, and scripts; focuses on main content
20
+
21
+ ---
22
+
23
+ ## When to Use Each Mode
24
+
25
+ ### Extract Content
26
+ Crawls each page and converts its content to Markdown (or text/CSV). Use this when you want to feed documentation directly into an LLM as context.
27
+
28
+ - **Best for**: NotebookLM, Claude Projects, ChatGPT — any tool that accepts uploaded documents
29
+ - **Output**: One or more `.md` files per starting URL, split at ~500KB
30
+ - **Typical workflow**: Extract content → upload files to NotebookLM → ask questions
31
+
32
+ ### Extract URLs Only
33
+ Crawls the site and collects every discovered URL within scope, writing them to a plain `.txt` file — one URL per line, no other content.
34
+
35
+ **Use this when NotebookLM's URL limit is the bottleneck.**
36
+
37
+ NotebookLM supports adding web URLs as sources, but has a cap on how many you can add per notebook. When a documentation section has hundreds of pages, you'll hit that limit quickly. The recommended two-step workflow is:
38
+
39
+ 1. **Run "Extract URLs Only"** on the target documentation to get a full list of all pages within scope
40
+ 2. **Review and trim** the URL list down to the most relevant pages
41
+ 3. **Add the trimmed URLs directly to NotebookLM** as web sources — NotebookLM fetches and indexes them itself, giving you live, citable sources rather than static file uploads
42
+
43
+ This approach gives you fine-grained control over exactly which pages NotebookLM indexes, without wasting your URL quota on irrelevant pages.
44
+
45
+ ---
46
+
47
+ ## Tech Stack
48
+
49
+ | Layer | Technology |
50
+ |---|---|
51
+ | Backend | Python 3.10+, Flask |
52
+ | Scraping | Playwright (headless Chromium) |
53
+ | Parsing | BeautifulSoup4 |
54
+ | Frontend | HTML5, CSS (Glassmorphism), Vanilla JS |
55
+ | CI/CD | GitHub Actions, Black, Flake8, Bandit |
56
+
57
+ ---
58
+
59
+ ## Continuous Integration (CI/CD)
60
+
61
+ DocuLift includes a pre-configured GitHub Actions pipeline (`.github/workflows/ci.yml`) that automatically runs on every push and pull request to the `main` or `master` branches.
62
+
63
+ The pipeline executes the following checks to ensure code quality and security:
64
+
65
+ 1. **Code Formatting (Black)**
66
+ - Automatically checks that all Python files adhere to standard `black` formatting rules.
67
+ 2. **Linting (Flake8)**
68
+ - Scans for syntax errors, undefined names, and unused imports.
69
+ - Enforces a maximum line length and complexity thresholds.
70
+ 3. **Security Scanning (Bandit)**
71
+ - Analyzes Python code for common security vulnerabilities.
72
+ - Ensures safe configurations (e.g., verifying `debug=False` for Flask in production environments).
73
+
74
+ *Note: The pipeline strictly fails if any high-severity security issues are found, preventing insecure code from being merged.*
75
+
76
+ ---
77
+
78
+ ## Installation
79
+
80
+ DocuLift is published on PyPI as `doculift`. We recommend installing it in a virtual environment or using `pipx`.
81
+
82
+ ### Prerequisites
83
+ - Python 3.10 or higher
84
+
85
+ ### Steps
86
+
87
+ 1. **Install the package via pip**
88
+ ```bash
89
+ pip install doculift
90
+ ```
91
+
92
+ 2. **Install Chromium (required for dynamic page scraping)**
93
+ ```bash
94
+ playwright install chromium
95
+ ```
96
+
97
+ 3. **Start the Web UI**
98
+ ```bash
99
+ doculift ui
100
+ ```
101
+ Open `http://127.0.0.1:5001` in your browser.
102
+
103
+ ---
104
+
105
+ ## Usage
106
+
107
+ DocuLift is a hybrid tool. You can run it via a beautiful Web interface, or directly from your terminal.
108
+
109
+ ### 1. Web User Interface
110
+
111
+ Start the local server:
112
+ ```bash
113
+ doculift ui
114
+ # or
115
+ doculift ui --port 5001
116
+ ```
117
+ Then open `http://127.0.0.1:5001` in your browser.
118
+
119
+ 1. **Enter target URLs** — one per line (e.g. `https://docs.docker.com/reference/`)
120
+ 2. **Choose Extract Mode** — *Extract Content* or *Extract URLs Only*
121
+ 3. **Choose Scoping Strategy** — *Section Only* (recommended) or *Entire Domain*
122
+ 4. **Choose Output Format** — Markdown, Plain Text, or CSV (applies to content mode)
123
+ 5. **Set Max Pages per URL** — default 500; each starting URL is crawled independently up to this limit
124
+ 6. **Click "Siphon Content"** and watch the progress bar
125
+ 7. On completion, per-URL stats are shown and files are available for download
126
+
127
+ ### 2. Command Line Interface (CLI)
128
+
129
+ Run extraction directly from your terminal with a beautiful progress bar. Files will be saved into the `./outputs` folder automatically.
130
+
131
+ ```bash
132
+ # See all available commands and options
133
+ doculift --help
134
+
135
+ # See options specific to the scrape command
136
+ doculift scrape --help
137
+
138
+ # Example: Extract full markdown content from a documentation section
139
+ doculift scrape https://docs.docker.com/reference/
140
+
141
+ # Example: Extract only URLs, capped at 1000 pages, from multiple sources
142
+ doculift scrape https://paketo.io/docs/ https://docs.docker.com/ --mode urls --max-pages 1000
143
+ ```
144
+
145
+ ---
146
+
147
+ ## How It Works
148
+
149
+ ```
150
+ User submits URLs + config
151
+
152
+ Background thread spawned (one per job)
153
+
154
+ For each starting URL:
155
+ ├── Determine scope (section boundary or full domain)
156
+ ├── BFS crawl with Playwright (handles JS rendering)
157
+ ├── [Content mode] Clean HTML → Markdown, buffer → split files at 500KB
158
+ └── [URL mode] Collect discovered links → single .txt file
159
+
160
+ Per-URL stats displayed, files available for download
161
+ ```
162
+
163
+ **Key crawl behaviours:**
164
+ - Each starting URL gets an independent BFS with its own visited set — URLs are not cross-contaminated between starting points
165
+ - `max_pages` applies per starting URL, not globally
166
+ - Pages already scraped by an earlier starting URL in the same job are skipped to avoid duplication
167
+ - Fragment URLs (`#anchor`) are normalised and deduplicated
168
+
169
+ ---
170
+
171
+ ## API
172
+
173
+ Trigger jobs programmatically:
174
+
175
+ ```bash
176
+ curl -X POST http://127.0.0.1:5001/scrape \
177
+ -H "Content-Type: application/json" \
178
+ -d '{
179
+ "urls": ["https://docs.docker.com/reference/", "https://paketo.io/docs/"],
180
+ "format": "md",
181
+ "max_pages": 200,
182
+ "scope_type": "section",
183
+ "extract_mode": "content"
184
+ }'
185
+ ```
186
+
187
+ Response:
188
+ ```json
189
+ { "job_id": "abc123" }
190
+ ```
191
+
192
+ Poll for status:
193
+ ```bash
194
+ curl http://127.0.0.1:5001/status/abc123
195
+ ```
196
+
197
+ Response fields: `status`, `progress`, `is_finished`, `files`, `per_url_stats`, `urls_extracted`.
198
+
199
+ Download a file:
200
+ ```
201
+ GET /download/<job_id>/<filename>
202
+ ```
@@ -0,0 +1,48 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "doculift"
7
+ version = "0.1.0"
8
+ description = "A powerful CLI & web scraper that lifts documentation for Large Language Models."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ { name = "M.J. Shetty" }
14
+ ]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 3",
17
+ "License :: OSI Approved :: MIT License",
18
+ "Operating System :: OS Independent",
19
+ "Intended Audience :: Developers",
20
+ "Topic :: Utilities",
21
+ ]
22
+ dependencies = [
23
+ "flask>=3.0.0",
24
+ "requests",
25
+ "beautifulsoup4",
26
+ "playwright",
27
+ "click",
28
+ "rich"
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ dev = [
33
+ "black",
34
+ "flake8",
35
+ "bandit",
36
+ "build",
37
+ "twine"
38
+ ]
39
+
40
+ [project.urls]
41
+ "Homepage" = "https://github.com/mjshetty/doculift"
42
+
43
+ [project.scripts]
44
+ doculift = "doculift.cli:cli"
45
+
46
+ [tool.setuptools.packages.find]
47
+ where = ["src"]
48
+ include = ["doculift*"]
@@ -0,0 +1,9 @@
1
+ flask
2
+ requests
3
+ beautifulsoup4
4
+ playwright
5
+ flake8
6
+ black
7
+ bandit
8
+ click
9
+ rich
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes
@@ -0,0 +1,4 @@
1
+ from .cli import cli
2
+
3
+ if __name__ == "__main__":
4
+ cli()
@@ -0,0 +1,97 @@
1
+ from flask import Flask, render_template, request, jsonify, send_from_directory
2
+ import threading
3
+ import uuid
4
+ import os
5
+ from .scraper import DocuLiftScraper
6
+
7
+ app = Flask(__name__)
8
+
9
+ # In-memory storage for jobs
10
+ # Production should use Redis/Celery, but for this scale a global dict is fine.
11
+ jobs = {}
12
+
13
+ OUTPUT_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "outputs")
14
+ if not os.path.exists(OUTPUT_DIR):
15
+ os.makedirs(OUTPUT_DIR)
16
+
17
+
18
+ @app.route("/")
19
+ def index():
20
+ return render_template("index.html")
21
+
22
+
23
+ @app.route("/scrape", methods=["POST"])
24
+ def start_scrape():
25
+ data = request.json
26
+ urls = data.get("urls", [])
27
+ output_format = data.get("format", "md")
28
+ max_pages = int(data.get("max_pages", 500))
29
+ scope_type = data.get("scope_type", "section")
30
+ extract_mode = data.get("extract_mode", "content")
31
+
32
+ if not urls:
33
+ return jsonify({"error": "No URLs provided"}), 400
34
+
35
+ job_id = str(uuid.uuid4())
36
+ scraper = DocuLiftScraper(
37
+ urls,
38
+ output_format=output_format,
39
+ max_pages=max_pages,
40
+ scope_type=scope_type,
41
+ extract_mode=extract_mode,
42
+ )
43
+
44
+ jobs[job_id] = {"scraper": scraper, "status": "pending", "progress": 0, "files": []}
45
+
46
+ # Start thread
47
+ thread = threading.Thread(target=run_scraper_task, args=(job_id, scraper))
48
+ thread.daemon = True
49
+ thread.start()
50
+
51
+ return jsonify({"job_id": job_id})
52
+
53
+
54
+ def run_scraper_task(job_id, scraper):
55
+ try:
56
+ jobs[job_id]["status"] = "running"
57
+ job_dir = os.path.join(OUTPUT_DIR, job_id)
58
+ scraper.run(job_dir)
59
+
60
+ # Get list of files
61
+ files = os.listdir(job_dir)
62
+ jobs[job_id]["files"] = files
63
+ jobs[job_id]["status"] = "completed"
64
+ jobs[job_id]["progress"] = 100
65
+ except Exception as e:
66
+ jobs[job_id]["status"] = f"error: {str(e)}"
67
+
68
+
69
+ @app.route("/status/<job_id>")
70
+ def get_status(job_id):
71
+ job = jobs.get(job_id)
72
+ if not job:
73
+ return jsonify({"error": "Job not found"}), 404
74
+
75
+ return jsonify(
76
+ {
77
+ "status": job["scraper"].status,
78
+ "progress": job["scraper"].progress,
79
+ "is_finished": job["status"] in ["completed", "error"]
80
+ or "error" in job["status"],
81
+ "files": job["files"],
82
+ "job_id": job_id,
83
+ "urls_extracted": job["scraper"].urls_extracted,
84
+ "per_url_stats": job["scraper"].per_url_stats,
85
+ }
86
+ )
87
+
88
+
89
+ @app.route("/download/<job_id>/<filename>")
90
+ def download_file(job_id, filename):
91
+ return send_from_directory(os.path.join(OUTPUT_DIR, job_id), filename)
92
+
93
+
94
+ if __name__ == "__main__":
95
+ # debug=True is disabled to prevent arbitrary code execution vulnerabilities
96
+ # in production-like environments, satisfying Bandit B201.
97
+ app.run(debug=False, port=5001)