achem 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
achem-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 ACHEM Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
achem-1.0.0/PKG-INFO ADDED
@@ -0,0 +1,253 @@
1
+ Metadata-Version: 2.4
2
+ Name: achem
3
+ Version: 1.0.0
4
+ Summary: Deep Web Research Tool - Aggregates 30+ sources, scrapes content, generates AI summaries
5
+ Author: ACHEM Contributors
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/achem/achem
8
+ Project-URL: Documentation, https://github.com/achem/achem#readme
9
+ Project-URL: Repository, https://github.com/achem/achem
10
+ Project-URL: Issues, https://github.com/achem/achem/issues
11
+ Keywords: research,deep-web,web-scraping,summarization,ai,cli,tool
12
+ Classifier: Development Status :: 5 - Production/Stable
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Intended Audience :: Education
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: Operating System :: OS Independent
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Internet :: WWW/HTTP :: Indexing/Search
24
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
+ Classifier: Topic :: Text Processing :: General
26
+ Requires-Python: >=3.10
27
+ Description-Content-Type: text/markdown
28
+ License-File: LICENSE
29
+ Requires-Dist: wikipedia-api>=0.5.4
30
+ Requires-Dist: rich>=13.0.0
31
+ Requires-Dist: psutil>=5.9.0
32
+ Requires-Dist: prompt_toolkit>=3.0.0
33
+ Requires-Dist: pyfiglet>=0.8.0
34
+ Requires-Dist: openai>=1.0.0
35
+ Requires-Dist: ddgs>=3.0.0
36
+ Requires-Dist: beautifulsoup4>=4.12.0
37
+ Requires-Dist: requests>=2.31.0
38
+ Provides-Extra: arabic
39
+ Requires-Dist: arabic-reshaper>=3.0.0; extra == "arabic"
40
+ Requires-Dist: python-bidi>=0.14.0; extra == "arabic"
41
+ Provides-Extra: dev
42
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
43
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
44
+ Dynamic: license-file
45
+
46
+ # ACHEM - Deep Web Research Tool
47
+
48
+ ![ACHEM Banner](https://img.shields.io/badge/ACHEM-v1.0.0-blue?style=for-the-badge)
49
+
50
+ > **ACHEM** (Arabic: آشم) is a powerful deep web research tool that aggregates information from 30+ sources, scrapes full content from top results, and generates concise summaries using AI.
51
+
52
+ ## Features
53
+
54
+ - **Deep Web Research**: Gathers results from 30+ sources via DuckDuckGo
55
+ - **Web Scraping**: Extracts full content from top 3 most relevant links
56
+ - **Two-Pass Search**: Prioritizes technical content (StackOverflow, GitHub, forums)
57
+ - **AI Summarization**: Uses Hugging Face Inference Providers (free tier)
58
+ - **Syntax Highlighting**: Color-coded output for easy scanning
59
+ - **SQLite Cache**: Instant recall for repeated searches
60
+ - **Export**: Save summaries to Markdown files
61
+ - **Multi-language**: Supports English, French, and Arabic
62
+
63
+ ## Screenshots
64
+
65
+ ```
66
+ ╔══════════════════════════════════════════════════════════════════╗
67
+ ║ ACHEM - Deep Web Research ║
68
+ ╚══════════════════════════════════════════════════════════════════╝
69
+
70
+ 🔍 Deep Research: how to learn python
71
+ ==================================================
72
+ PASS 1: Gathering 30 sources from DuckDuckGo...
73
+ ✓ Found 30 sources
74
+ PASS 2: Scraped full content from top 3 links
75
+ → Analyzing 35 total sources...
76
+ → Generating deep summary...
77
+
78
+ ╭──────────────────────────────────────────────────────────────────╮
79
+ │ UNIFIED RESEARCH SUMMARY │
80
+ ├──────────────────────────────────────────────────────────────────┤
81
+ │ 1. Start with the official Python tutorial: │
82
+ │ - Visit docs.python.org/3/tutorial │
83
+ │ │
84
+ │ 2. Use free online tutorials: │
85
+ │ - LearnPython.org, pythonbasics.org │
86
+ ╰──────────────────────────────────────────────────────────────────╯
87
+ ```
88
+
89
+ ## Installation
90
+
91
+ ### Prerequisites
92
+
93
+ - Python 3.10 or higher
94
+ - pip package manager
95
+
96
+ ### Quick Install (PyPI)
97
+
98
+ ```bash
99
+ pip install achem
100
+ ```
101
+
102
+ ### Or Install from Source
103
+
104
+ 1. **Clone the repository**
105
+ ```bash
106
+ git clone https://github.com/achem/achem.git
107
+ cd achem
108
+ ```
109
+
110
+ 2. **Install in editable mode**
111
+ ```bash
112
+ pip install -e .
113
+ ```
114
+
115
+ 3. **Configure API keys**
116
+ ```bash
117
+ cp .env.example .env
118
+ ```
119
+
120
+ Then edit `.env` and add your Hugging Face API token:
121
+ ```env
122
+ HF_API_KEY=hf_your_token_here
123
+ HF_MODEL=Qwen/Qwen2.5-7B-Instruct
124
+ ```
125
+
126
+ ### Getting a Hugging Face API Token
127
+
128
+ 1. Go to [Hugging Face](https://huggingface.co/)
129
+ 2. Create an account (free)
130
+ 3. Go to Settings → Access Tokens
131
+ 4. Create a new token with "Read" permissions
132
+ 5. Copy the token to your `.env` file
133
+
134
+ ## Usage
135
+
136
+ ### Interactive Mode
137
+
138
+ ```bash
139
+ python src/main.py
140
+ ```
141
+
142
+ ### Command Line Mode
143
+
144
+ ```bash
145
+ python src/main.py "your search query"
146
+ ```
147
+
148
+ ### Options
149
+
150
+ | Option | Description | Default |
151
+ |--------|-------------|---------|
152
+ | `-l, --limit` | Wikipedia results per query | 10 |
153
+ | `--lang` | Language (en/fr/ar/auto) | auto |
154
+ | `--ddg-limit` | DuckDuckGo results | 30 |
155
+ | `--min-relevance` | Minimum relevance % | 0 |
156
+ | `--no-cache` | Skip cache | False |
157
+ | `--no-wikipedia` | Skip Wikipedia | False |
158
+ | `--clear-cache` | Clear SQLite cache | False |
159
+
160
+ ### Commands (Interactive Mode)
161
+
162
+ | Command | Description |
163
+ |---------|-------------|
164
+ | `clear` / `cls` | Clear screen |
165
+ | `export` / `save` | Export last summary |
166
+ | `help` / `?` | Show help |
167
+ | `version` / `v` | Show version |
168
+ | `exit` / `quit` / `q` | Exit program |
169
+
170
+ ## Project Structure
171
+
172
+ ```
173
+ ACHEM/
174
+ ├── src/
175
+ │ └── achem/ # Main package
176
+ │ ├── __init__.py
177
+ │ ├── main.py # Entry point
178
+ │ ├── commands.py # Command handler
179
+ │ ├── config_manager.py # Config loader
180
+ │ ├── duckduckgo_client.py # DDG search
181
+ │ ├── export_manager.py # Export to Documents/ACHEM/
182
+ │ ├── huggingface_summarizer.py # AI summarization
183
+ │ ├── output_formatter.py # Terminal UI
184
+ │ ├── search_router.py # Source priority
185
+ │ ├── sqlite_cache.py # SQLite cache
186
+ │ ├── spell_checker.py # Typo correction
187
+ │ ├── text_analyzer.py # TF-IDF analysis
188
+ │ ├── user_input.py # Input handler
189
+ │ ├── web_scraper.py # BeautifulSoup scraper
190
+ │ └── wikipedia_client.py # Wikipedia API
191
+ ├── .env.example # Config template
192
+ ├── .gitignore
193
+ ├── LICENSE
194
+ ├── README.md
195
+ └── pyproject.toml # Package metadata
196
+ ```
197
+
198
+ ## How It Works
199
+
200
+ ### Two-Pass Search System
201
+
202
+ ```
203
+ ┌─────────────────────────────────────────────────────┐
204
+ │ PASS 1: DuckDuckGo Search (30 results) │
205
+ │ • Prioritizes technical sites │
206
+ │ • Filters out cookie/login/consent pages │
207
+ │ • Ranks by domain authority │
208
+ ├─────────────────────────────────────────────────────┤
209
+ │ PASS 2: Web Scraping (Top 3) │
210
+ │ • BeautifulSoup extracts full article text │
211
+ │ • Removes navigation/footer/scripts │
212
+ │ • Combines up to 10,000 chars per article │
213
+ ├─────────────────────────────────────────────────────┤
214
+ │ PASS 3: AI Summarization │
215
+ │ • Neutral technical prompt │
216
+ │ • No ethical warnings or opinions │
217
+ │ • 500-4000 character output │
218
+ │ • Syntax highlighting for steps/commands │
219
+ └─────────────────────────────────────────────────────┘
220
+ ```
221
+
222
+ ### Source Priority
223
+
224
+ 1. **DuckDuckGo** (Primary) - Real-time web results
225
+ 2. **Wikipedia** (Secondary) - Background concepts only
226
+ 3. **Web Scraping** - Full content from top 3
227
+
228
+ ## Export Location
229
+
230
+ Summaries are saved to:
231
+ - **Linux/macOS**: `~/Documents/ACHEM/`
232
+ - **Windows**: `C:\Users\<username>\Documents\ACHEM\`
233
+
234
+ ## Disclaimer
235
+
236
+ **ACHEM is for educational and research purposes only.**
237
+
238
+ The tool aggregates publicly available information from the web. Any actions taken based on the information provided are the sole responsibility of the user. The developer is not responsible for any misuse of this tool.
239
+
240
+ ## License
241
+
242
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
243
+
244
+ ## Contributing
245
+
246
+ Contributions are welcome! Please feel free to submit issues and pull requests.
247
+
248
+ ## Acknowledgments
249
+
250
+ - [Hugging Face](https://huggingface.co/) - Free inference API
251
+ - [DuckDuckGo](https://duckduckgo.com/) - Privacy-focused search
252
+ - [Wikipedia](https://www.wikipedia.org/) - Free encyclopedia
253
+ - [Qwen](https://huggingface.co/Qwen) - Open source AI models
achem-1.0.0/README.md ADDED
@@ -0,0 +1,208 @@
1
+ # ACHEM - Deep Web Research Tool
2
+
3
+ ![ACHEM Banner](https://img.shields.io/badge/ACHEM-v1.0.0-blue?style=for-the-badge)
4
+
5
+ > **ACHEM** (Arabic: آشم) is a powerful deep web research tool that aggregates information from 30+ sources, scrapes full content from top results, and generates concise summaries using AI.
6
+
7
+ ## Features
8
+
9
+ - **Deep Web Research**: Gathers results from 30+ sources via DuckDuckGo
10
+ - **Web Scraping**: Extracts full content from top 3 most relevant links
11
+ - **Two-Pass Search**: Prioritizes technical content (StackOverflow, GitHub, forums)
12
+ - **AI Summarization**: Uses Hugging Face Inference Providers (free tier)
13
+ - **Syntax Highlighting**: Color-coded output for easy scanning
14
+ - **SQLite Cache**: Instant recall for repeated searches
15
+ - **Export**: Save summaries to Markdown files
16
+ - **Multi-language**: Supports English, French, and Arabic
17
+
18
+ ## Screenshots
19
+
20
+ ```
21
+ ╔══════════════════════════════════════════════════════════════════╗
22
+ ║ ACHEM - Deep Web Research ║
23
+ ╚══════════════════════════════════════════════════════════════════╝
24
+
25
+ 🔍 Deep Research: how to learn python
26
+ ==================================================
27
+ PASS 1: Gathering 30 sources from DuckDuckGo...
28
+ ✓ Found 30 sources
29
+ PASS 2: Scraped full content from top 3 links
30
+ → Analyzing 35 total sources...
31
+ → Generating deep summary...
32
+
33
+ ╭──────────────────────────────────────────────────────────────────╮
34
+ │ UNIFIED RESEARCH SUMMARY │
35
+ ├──────────────────────────────────────────────────────────────────┤
36
+ │ 1. Start with the official Python tutorial: │
37
+ │ - Visit docs.python.org/3/tutorial │
38
+ │ │
39
+ │ 2. Use free online tutorials: │
40
+ │ - LearnPython.org, pythonbasics.org │
41
+ ╰──────────────────────────────────────────────────────────────────╯
42
+ ```
43
+
44
+ ## Installation
45
+
46
+ ### Prerequisites
47
+
48
+ - Python 3.10 or higher
49
+ - pip package manager
50
+
51
+ ### Quick Install (PyPI)
52
+
53
+ ```bash
54
+ pip install achem
55
+ ```
56
+
57
+ ### Or Install from Source
58
+
59
+ 1. **Clone the repository**
60
+ ```bash
61
+ git clone https://github.com/achem/achem.git
62
+ cd achem
63
+ ```
64
+
65
+ 2. **Install in editable mode**
66
+ ```bash
67
+ pip install -e .
68
+ ```
69
+
70
+ 3. **Configure API keys**
71
+ ```bash
72
+ cp .env.example .env
73
+ ```
74
+
75
+ Then edit `.env` and add your Hugging Face API token:
76
+ ```env
77
+ HF_API_KEY=hf_your_token_here
78
+ HF_MODEL=Qwen/Qwen2.5-7B-Instruct
79
+ ```
80
+
81
+ ### Getting a Hugging Face API Token
82
+
83
+ 1. Go to [Hugging Face](https://huggingface.co/)
84
+ 2. Create an account (free)
85
+ 3. Go to Settings → Access Tokens
86
+ 4. Create a new token with "Read" permissions
87
+ 5. Copy the token to your `.env` file
88
+
89
+ ## Usage
90
+
91
+ ### Interactive Mode
92
+
93
+ ```bash
94
+ python src/main.py
95
+ ```
96
+
97
+ ### Command Line Mode
98
+
99
+ ```bash
100
+ python src/main.py "your search query"
101
+ ```
102
+
103
+ ### Options
104
+
105
+ | Option | Description | Default |
106
+ |--------|-------------|---------|
107
+ | `-l, --limit` | Wikipedia results per query | 10 |
108
+ | `--lang` | Language (en/fr/ar/auto) | auto |
109
+ | `--ddg-limit` | DuckDuckGo results | 30 |
110
+ | `--min-relevance` | Minimum relevance % | 0 |
111
+ | `--no-cache` | Skip cache | False |
112
+ | `--no-wikipedia` | Skip Wikipedia | False |
113
+ | `--clear-cache` | Clear SQLite cache | False |
114
+
115
+ ### Commands (Interactive Mode)
116
+
117
+ | Command | Description |
118
+ |---------|-------------|
119
+ | `clear` / `cls` | Clear screen |
120
+ | `export` / `save` | Export last summary |
121
+ | `help` / `?` | Show help |
122
+ | `version` / `v` | Show version |
123
+ | `exit` / `quit` / `q` | Exit program |
124
+
125
+ ## Project Structure
126
+
127
+ ```
128
+ ACHEM/
129
+ ├── src/
130
+ │ └── achem/ # Main package
131
+ │ ├── __init__.py
132
+ │ ├── main.py # Entry point
133
+ │ ├── commands.py # Command handler
134
+ │ ├── config_manager.py # Config loader
135
+ │ ├── duckduckgo_client.py # DDG search
136
+ │ ├── export_manager.py # Export to Documents/ACHEM/
137
+ │ ├── huggingface_summarizer.py # AI summarization
138
+ │ ├── output_formatter.py # Terminal UI
139
+ │ ├── search_router.py # Source priority
140
+ │ ├── sqlite_cache.py # SQLite cache
141
+ │ ├── spell_checker.py # Typo correction
142
+ │ ├── text_analyzer.py # TF-IDF analysis
143
+ │ ├── user_input.py # Input handler
144
+ │ ├── web_scraper.py # BeautifulSoup scraper
145
+ │ └── wikipedia_client.py # Wikipedia API
146
+ ├── .env.example # Config template
147
+ ├── .gitignore
148
+ ├── LICENSE
149
+ ├── README.md
150
+ └── pyproject.toml # Package metadata
151
+ ```
152
+
153
+ ## How It Works
154
+
155
+ ### Two-Pass Search System
156
+
157
+ ```
158
+ ┌─────────────────────────────────────────────────────┐
159
+ │ PASS 1: DuckDuckGo Search (30 results) │
160
+ │ • Prioritizes technical sites │
161
+ │ • Filters out cookie/login/consent pages │
162
+ │ • Ranks by domain authority │
163
+ ├─────────────────────────────────────────────────────┤
164
+ │ PASS 2: Web Scraping (Top 3) │
165
+ │ • BeautifulSoup extracts full article text │
166
+ │ • Removes navigation/footer/scripts │
167
+ │ • Combines up to 10,000 chars per article │
168
+ ├─────────────────────────────────────────────────────┤
169
+ │ PASS 3: AI Summarization │
170
+ │ • Neutral technical prompt │
171
+ │ • No ethical warnings or opinions │
172
+ │ • 500-4000 character output │
173
+ │ • Syntax highlighting for steps/commands │
174
+ └─────────────────────────────────────────────────────┘
175
+ ```
176
+
177
+ ### Source Priority
178
+
179
+ 1. **DuckDuckGo** (Primary) - Real-time web results
180
+ 2. **Wikipedia** (Secondary) - Background concepts only
181
+ 3. **Web Scraping** - Full content from top 3
182
+
183
+ ## Export Location
184
+
185
+ Summaries are saved to:
186
+ - **Linux/macOS**: `~/Documents/ACHEM/`
187
+ - **Windows**: `C:\Users\<username>\Documents\ACHEM\`
188
+
189
+ ## Disclaimer
190
+
191
+ **ACHEM is for educational and research purposes only.**
192
+
193
+ The tool aggregates publicly available information from the web. Any actions taken based on the information provided are the sole responsibility of the user. The developer is not responsible for any misuse of this tool.
194
+
195
+ ## License
196
+
197
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
198
+
199
+ ## Contributing
200
+
201
+ Contributions are welcome! Please feel free to submit issues and pull requests.
202
+
203
+ ## Acknowledgments
204
+
205
+ - [Hugging Face](https://huggingface.co/) - Free inference API
206
+ - [DuckDuckGo](https://duckduckgo.com/) - Privacy-focused search
207
+ - [Wikipedia](https://www.wikipedia.org/) - Free encyclopedia
208
+ - [Qwen](https://huggingface.co/Qwen) - Open source AI models
@@ -0,0 +1,75 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "achem"
7
+ version = "1.0.0"
8
+ description = "Deep Web Research Tool - Aggregates 30+ sources, scrapes content, generates AI summaries"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ authors = [
12
+ {name = "ACHEM Contributors"}
13
+ ]
14
+ keywords = [
15
+ "research",
16
+ "deep-web",
17
+ "web-scraping",
18
+ "summarization",
19
+ "ai",
20
+ "cli",
21
+ "tool"
22
+ ]
23
+ classifiers = [
24
+ "Development Status :: 5 - Production/Stable",
25
+ "Environment :: Console",
26
+ "Intended Audience :: Developers",
27
+ "Intended Audience :: Education",
28
+ "Intended Audience :: Science/Research",
29
+ "Operating System :: OS Independent",
30
+ "Programming Language :: Python :: 3",
31
+ "Programming Language :: Python :: 3.10",
32
+ "Programming Language :: Python :: 3.11",
33
+ "Programming Language :: Python :: 3.12",
34
+ "Programming Language :: Python :: 3.13",
35
+ "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
36
+ "Topic :: Scientific/Engineering :: Information Analysis",
37
+ "Topic :: Text Processing :: General",
38
+ ]
39
+ requires-python = ">=3.10"
40
+ dependencies = [
41
+ "wikipedia-api>=0.5.4",
42
+ "rich>=13.0.0",
43
+ "psutil>=5.9.0",
44
+ "prompt_toolkit>=3.0.0",
45
+ "pyfiglet>=0.8.0",
46
+ "openai>=1.0.0",
47
+ "ddgs>=3.0.0",
48
+ "beautifulsoup4>=4.12.0",
49
+ "requests>=2.31.0",
50
+ ]
51
+
52
+ [project.optional-dependencies]
53
+ arabic = [
54
+ "arabic-reshaper>=3.0.0",
55
+ "python-bidi>=0.14.0",
56
+ ]
57
+ dev = [
58
+ "pytest>=7.0.0",
59
+ "ruff>=0.1.0",
60
+ ]
61
+
62
+ [project.scripts]
63
+ achem = "achem.main:main"
64
+
65
+ [project.urls]
66
+ Homepage = "https://github.com/achem/achem"
67
+ Documentation = "https://github.com/achem/achem#readme"
68
+ Repository = "https://github.com/achem/achem"
69
+ Issues = "https://github.com/achem/achem/issues"
70
+
71
+ [tool.setuptools.packages.find]
72
+ where = ["src"]
73
+
74
+ [tool.setuptools.package-data]
75
+ achem = ["py.typed"]
achem-1.0.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,9 @@
1
+ """ACHEM - Deep Web Research Tool
2
+
3
+ A powerful CLI tool for aggregating information from 30+ sources,
4
+ scraping full content, and generating AI-powered summaries.
5
+ """
6
+
7
+ __version__ = "1.0.0"
8
+ __author__ = "ACHEM Contributors"
9
+ __license__ = "MIT"
@@ -0,0 +1,6 @@
1
+ """ACHEM - Run as: python -m achem"""
2
+
3
+ from .main import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -0,0 +1,98 @@
1
+ import os
2
+ import json
3
+ import time
4
+ import hashlib
5
+ from pathlib import Path
6
+ from typing import Optional
7
+
8
+
9
+ class CacheManager:
10
+ def __init__(self, cache_dir: str = None, ttl_seconds: int = 86400):
11
+ if cache_dir is None:
12
+ cache_dir = os.path.join(
13
+ os.path.expanduser("~"), ".wiki-summarizer", "cache"
14
+ )
15
+
16
+ self.cache_dir = Path(cache_dir)
17
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
18
+ self.ttl_seconds = ttl_seconds
19
+
20
+ def _get_cache_key(self, query: str) -> str:
21
+ """Generate a unique cache key for a query."""
22
+ query_normalized = query.lower().strip()
23
+ return hashlib.md5(query_normalized.encode()).hexdigest()
24
+
25
+ def _get_cache_path(self, cache_key: str) -> Path:
26
+ """Get the file path for a cache key."""
27
+ return self.cache_dir / f"{cache_key}.json"
28
+
29
+ def get(self, query: str) -> Optional[dict]:
30
+ """Retrieve cached data for a query."""
31
+ cache_key = self._get_cache_key(query)
32
+ cache_path = self._get_cache_path(cache_key)
33
+
34
+ if not cache_path.exists():
35
+ return None
36
+
37
+ try:
38
+ with open(cache_path, "r", encoding="utf-8") as f:
39
+ cache_data = json.load(f)
40
+
41
+ cached_time = cache_data.get("timestamp", 0)
42
+ if time.time() - cached_time > self.ttl_seconds:
43
+ cache_path.unlink()
44
+ return None
45
+
46
+ return cache_data.get("data")
47
+ except (json.JSONDecodeError, IOError):
48
+ return None
49
+
50
+ def set(self, query: str, data: dict) -> None:
51
+ """Store data in cache for a query."""
52
+ cache_key = self._get_cache_key(query)
53
+ cache_path = self._get_cache_path(cache_key)
54
+
55
+ cache_data = {"query": query, "timestamp": time.time(), "data": data}
56
+
57
+ try:
58
+ with open(cache_path, "w", encoding="utf-8") as f:
59
+ json.dump(cache_data, f, ensure_ascii=False, indent=2)
60
+ except IOError:
61
+ pass
62
+
63
+ def invalidate(self, query: str = None) -> None:
64
+ """Invalidate cache for a specific query or all queries."""
65
+ if query:
66
+ cache_key = self._get_cache_key(query)
67
+ cache_path = self._get_cache_path(cache_key)
68
+ if cache_path.exists():
69
+ cache_path.unlink()
70
+ else:
71
+ for cache_file in self.cache_dir.glob("*.json"):
72
+ cache_file.unlink()
73
+
74
+ def get_stats(self) -> dict:
75
+ """Get cache statistics."""
76
+ total_size = 0
77
+ file_count = 0
78
+ expired_count = 0
79
+ current_time = time.time()
80
+
81
+ for cache_file in self.cache_dir.glob("*.json"):
82
+ file_count += 1
83
+ total_size += cache_file.stat().st_size
84
+
85
+ try:
86
+ with open(cache_file, "r", encoding="utf-8") as f:
87
+ cache_data = json.load(f)
88
+ cached_time = cache_data.get("timestamp", 0)
89
+ if current_time - cached_time > self.ttl_seconds:
90
+ expired_count += 1
91
+ except (json.JSONDecodeError, IOError):
92
+ pass
93
+
94
+ return {
95
+ "total_files": file_count,
96
+ "total_size_mb": round(total_size / (1024 * 1024), 2),
97
+ "expired_files": expired_count,
98
+ }