drupal-news 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- drupal_news/__init__.py +1 -0
- drupal_news/ai_summarizer.py +337 -0
- drupal_news/cache_manager.py +174 -0
- drupal_news/cli.py +41 -0
- drupal_news/compile_scss.py +161 -0
- drupal_news/data_cleaner.py +167 -0
- drupal_news/email_sender.py +265 -0
- drupal_news/index.py +426 -0
- drupal_news/markdown_converter.py +187 -0
- drupal_news/metrics_collector.py +119 -0
- drupal_news/pdf_generator.py +259 -0
- drupal_news/pipeline_integrity.py +152 -0
- drupal_news/process_logger.py +82 -0
- drupal_news/rss_reader.py +134 -0
- drupal_news/scheduler.py +100 -0
- drupal_news/utils/__init__.py +1 -0
- drupal_news/utils/dedupe.py +70 -0
- drupal_news/utils/html_norm.py +96 -0
- drupal_news/utils/io_safe.py +107 -0
- drupal_news/utils/md_config_parser.py +116 -0
- drupal_news/utils/providers/__init__.py +1 -0
- drupal_news/utils/providers/anthropic_client.py +72 -0
- drupal_news/utils/providers/deepseek_client.py +78 -0
- drupal_news/utils/providers/gemini_client.py +78 -0
- drupal_news/utils/providers/generic_client.py +139 -0
- drupal_news/utils/providers/grok_client.py +80 -0
- drupal_news/utils/providers/lmstudio_client.py +70 -0
- drupal_news/utils/providers/ollama_client.py +67 -0
- drupal_news/utils/providers/openai_client.py +72 -0
- drupal_news/utils/providers/openrouter_client.py +88 -0
- drupal_news/utils/providers/qwen_client.py +80 -0
- drupal_news/utils/schema.py +87 -0
- drupal_news/utils/timebox.py +93 -0
- drupal_news/validator.py +153 -0
- drupal_news/viewer.py +219 -0
- drupal_news/webpage_reader.py +405 -0
- drupal_news-0.1.0.dist-info/METADATA +349 -0
- drupal_news-0.1.0.dist-info/RECORD +42 -0
- drupal_news-0.1.0.dist-info/WHEEL +5 -0
- drupal_news-0.1.0.dist-info/entry_points.txt +5 -0
- drupal_news-0.1.0.dist-info/licenses/LICENSE.txt +339 -0
- drupal_news-0.1.0.dist-info/top_level.txt +1 -0
drupal_news/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Drupal News Aggregator - Source package."""
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"""AI summarizer for Drupal Newsletter."""
|
|
2
|
+
import importlib
|
|
3
|
+
from typing import List, Dict, Any, Optional
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import time
|
|
6
|
+
from drupal_news.markdown_converter import items_to_text
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
SUMMARIZER_PROMPT_TEMPLATE = """
|
|
10
|
+
You are a technical writer for the Drupal community. Generate a summary of Drupal news and updates.
|
|
11
|
+
|
|
12
|
+
**Requirements:**
|
|
13
|
+
1. Focus on AI module and news on AI
|
|
14
|
+
2. Each fact MUST include a [source](URL) link
|
|
15
|
+
3. Use clear, factual language - no hype
|
|
16
|
+
4. If no major updates: include "No significant core updates this week"
|
|
17
|
+
5. Present RSS/new modules as a table with columns: URL, Name, Description
|
|
18
|
+
6. Organize by sections: Core Updates, Modules, AI/Automation, Canvas/Admin UI, Planet, D.O. Blog
|
|
19
|
+
|
|
20
|
+
**Timeframe:** Last {timeframe_days} days ({timezone})
|
|
21
|
+
|
|
22
|
+
**Items to summarize:**
|
|
23
|
+
|
|
24
|
+
{items_text}
|
|
25
|
+
|
|
26
|
+
Generate the summary in Markdown format with proper sections and source links.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def load_prompt_template(prompt_file: str = None) -> str:
|
|
31
|
+
"""
|
|
32
|
+
Load prompt template from markdown file or use default.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
prompt_file: Path to prompt.md file (default: prompt.md in project root)
|
|
36
|
+
|
|
37
|
+
Returns:
|
|
38
|
+
Prompt template string with placeholders
|
|
39
|
+
"""
|
|
40
|
+
if prompt_file is None:
|
|
41
|
+
# Default to prompt.md in the project root
|
|
42
|
+
prompt_file = Path(__file__).parent.parent / 'prompt.md'
|
|
43
|
+
|
|
44
|
+
prompt_path = Path(prompt_file)
|
|
45
|
+
|
|
46
|
+
if prompt_path.exists():
|
|
47
|
+
try:
|
|
48
|
+
return prompt_path.read_text(encoding='utf-8')
|
|
49
|
+
except Exception as e:
|
|
50
|
+
print(f"Warning: Could not read {prompt_file}: {e}")
|
|
51
|
+
print("Using default hardcoded prompt template")
|
|
52
|
+
|
|
53
|
+
# Fall back to hardcoded default
|
|
54
|
+
return SUMMARIZER_PROMPT_TEMPLATE
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_provider_client(provider_name: str, client_name: str = None):
|
|
58
|
+
"""
|
|
59
|
+
Dynamically load provider client module.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
provider_name: Provider name (e.g., 'openai', 'anthropic')
|
|
63
|
+
client_name: Optional client name from config (e.g., 'generic_client')
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Provider client module
|
|
67
|
+
"""
|
|
68
|
+
try:
|
|
69
|
+
# Use client_name if provided, otherwise use provider_name
|
|
70
|
+
module_name = client_name if client_name else f"{provider_name}_client"
|
|
71
|
+
# Remove _client suffix if already present
|
|
72
|
+
if not module_name.endswith('_client'):
|
|
73
|
+
module_name = f"{module_name}_client"
|
|
74
|
+
module = importlib.import_module(f"utils.providers.{module_name}")
|
|
75
|
+
return module
|
|
76
|
+
except ImportError as e:
|
|
77
|
+
raise ImportError(f"Provider '{provider_name}' (client: {client_name or provider_name}) not found: {e}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def calculate_cost(provider: str, provider_config: Dict[str, Any], summary_result: Dict[str, Any]) -> float:
|
|
82
|
+
"""Estimate USD cost for a summary run based on token usage and configured pricing."""
|
|
83
|
+
if not provider_config:
|
|
84
|
+
return 0.0
|
|
85
|
+
|
|
86
|
+
pricing = provider_config.get("pricing")
|
|
87
|
+
if not pricing:
|
|
88
|
+
return 0.0
|
|
89
|
+
|
|
90
|
+
input_tokens = summary_result.get("input_tokens")
|
|
91
|
+
output_tokens = summary_result.get("output_tokens")
|
|
92
|
+
total_tokens = summary_result.get("tokens", 0) or 0
|
|
93
|
+
|
|
94
|
+
cost = 0.0
|
|
95
|
+
|
|
96
|
+
input_rate = pricing.get("input_cost_per_1k")
|
|
97
|
+
output_rate = pricing.get("output_cost_per_1k")
|
|
98
|
+
blended_rate = pricing.get("per_1k_tokens")
|
|
99
|
+
|
|
100
|
+
if input_tokens is not None and input_rate is not None:
|
|
101
|
+
cost += (input_tokens / 1000.0) * float(input_rate)
|
|
102
|
+
|
|
103
|
+
if output_tokens is not None and output_rate is not None:
|
|
104
|
+
cost += (output_tokens / 1000.0) * float(output_rate)
|
|
105
|
+
|
|
106
|
+
# Fallback to blended pricing when detailed usage is absent
|
|
107
|
+
if cost == 0.0 and blended_rate is not None:
|
|
108
|
+
cost += (total_tokens / 1000.0) * float(blended_rate)
|
|
109
|
+
|
|
110
|
+
return round(cost, 6)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def summarize(
|
|
115
|
+
items: List[Dict[str, Any]],
|
|
116
|
+
provider: str,
|
|
117
|
+
model: str,
|
|
118
|
+
temperature: float,
|
|
119
|
+
timeframe_days: int,
|
|
120
|
+
timezone: str,
|
|
121
|
+
max_items: int = 200,
|
|
122
|
+
chunk_size: int = 200,
|
|
123
|
+
provider_config: Dict[str, Any] = None
|
|
124
|
+
) -> Dict[str, Any]:
|
|
125
|
+
"""
|
|
126
|
+
Generate AI summary of items.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
items: List of news items
|
|
130
|
+
provider: AI provider name
|
|
131
|
+
model: Model name
|
|
132
|
+
temperature: Temperature setting
|
|
133
|
+
timeframe_days: Number of days covered
|
|
134
|
+
timezone: Timezone name
|
|
135
|
+
max_items: Maximum items to process
|
|
136
|
+
chunk_size: Chunk size for large inputs
|
|
137
|
+
provider_config: Full provider configuration (includes api_url, headers, etc.)
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
Dictionary with 'text', 'tokens', 'model', 'provider', 'duration'
|
|
141
|
+
"""
|
|
142
|
+
start_time = time.time()
|
|
143
|
+
|
|
144
|
+
# Load provider client
|
|
145
|
+
# Extract client name from provider_config if available
|
|
146
|
+
client_name = provider_config.get("client") if provider_config else None
|
|
147
|
+
client = get_provider_client(provider, client_name)
|
|
148
|
+
|
|
149
|
+
# Convert items to text
|
|
150
|
+
items_text = items_to_text(items, max_items)
|
|
151
|
+
|
|
152
|
+
# Load prompt template from file or use default
|
|
153
|
+
template = load_prompt_template()
|
|
154
|
+
|
|
155
|
+
def render_prompt(items_text_value: str) -> str:
|
|
156
|
+
return template.format(
|
|
157
|
+
timeframe_days=timeframe_days,
|
|
158
|
+
timezone=timezone,
|
|
159
|
+
items_text=items_text_value
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
prompt = render_prompt(items_text)
|
|
163
|
+
|
|
164
|
+
# Prepare kwargs for provider (prompt injected per request)
|
|
165
|
+
base_kwargs = {
|
|
166
|
+
"model": model,
|
|
167
|
+
"temperature": temperature
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
# Add optional provider-specific settings
|
|
171
|
+
if provider_config:
|
|
172
|
+
if "api_url" in provider_config:
|
|
173
|
+
base_kwargs["api_url"] = provider_config["api_url"]
|
|
174
|
+
if "headers" in provider_config:
|
|
175
|
+
base_kwargs["headers"] = provider_config["headers"]
|
|
176
|
+
|
|
177
|
+
# Check if chunking needed
|
|
178
|
+
if len(items) > chunk_size:
|
|
179
|
+
summary_result = _summarize_chunked(
|
|
180
|
+
client,
|
|
181
|
+
items,
|
|
182
|
+
render_prompt,
|
|
183
|
+
chunk_size,
|
|
184
|
+
base_kwargs
|
|
185
|
+
)
|
|
186
|
+
else:
|
|
187
|
+
request_kwargs = dict(base_kwargs)
|
|
188
|
+
request_kwargs["prompt"] = prompt
|
|
189
|
+
try:
|
|
190
|
+
summary_result = client.generate_summary(**request_kwargs)
|
|
191
|
+
except Exception as e:
|
|
192
|
+
raise RuntimeError(f"Summarization failed: {str(e)}")
|
|
193
|
+
|
|
194
|
+
# Add metadata
|
|
195
|
+
summary_result.setdefault("provider", provider)
|
|
196
|
+
summary_result.setdefault("model", model)
|
|
197
|
+
summary_result["duration"] = time.time() - start_time
|
|
198
|
+
summary_result["cost"] = calculate_cost(provider, provider_config or {}, summary_result)
|
|
199
|
+
|
|
200
|
+
return summary_result
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _summarize_chunked(
|
|
204
|
+
client,
|
|
205
|
+
items: List[Dict[str, Any]],
|
|
206
|
+
render_prompt,
|
|
207
|
+
chunk_size: int,
|
|
208
|
+
base_kwargs: Dict[str, Any]
|
|
209
|
+
) -> Dict[str, Any]:
|
|
210
|
+
"""Summarize items in chunks for large datasets."""
|
|
211
|
+
summaries = []
|
|
212
|
+
total_tokens = 0
|
|
213
|
+
total_input_tokens = 0
|
|
214
|
+
total_output_tokens = 0
|
|
215
|
+
|
|
216
|
+
for i in range(0, len(items), chunk_size):
|
|
217
|
+
chunk = items[i:i + chunk_size]
|
|
218
|
+
chunk_kwargs = dict(base_kwargs)
|
|
219
|
+
chunk_kwargs["prompt"] = render_prompt(items_to_text(chunk))
|
|
220
|
+
|
|
221
|
+
try:
|
|
222
|
+
result = client.generate_summary(**chunk_kwargs)
|
|
223
|
+
summaries.append(result.get("text", ""))
|
|
224
|
+
total_tokens += result.get("tokens", 0) or 0
|
|
225
|
+
total_input_tokens += result.get("input_tokens") or 0
|
|
226
|
+
total_output_tokens += result.get("output_tokens") or 0
|
|
227
|
+
except Exception as e:
|
|
228
|
+
print(f"Warning: Chunk {i // chunk_size + 1} failed: {e}")
|
|
229
|
+
|
|
230
|
+
combined_text = "\n\n".join(filter(None, summaries))
|
|
231
|
+
|
|
232
|
+
summary = {
|
|
233
|
+
"text": combined_text,
|
|
234
|
+
"tokens": total_tokens,
|
|
235
|
+
"model": base_kwargs.get("model"),
|
|
236
|
+
"provider": client.__name__.split(".")[-1].replace("_client", ""),
|
|
237
|
+
"chunked": True
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
if total_input_tokens:
|
|
241
|
+
summary["input_tokens"] = total_input_tokens
|
|
242
|
+
if total_output_tokens:
|
|
243
|
+
summary["output_tokens"] = total_output_tokens
|
|
244
|
+
|
|
245
|
+
return summary
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def summarize_with_fallback(
|
|
249
|
+
items: List[Dict[str, Any]],
|
|
250
|
+
providers_config: Dict[str, Any],
|
|
251
|
+
default_provider: str,
|
|
252
|
+
timeframe_days: int,
|
|
253
|
+
timezone: str,
|
|
254
|
+
fallback_order: Optional[List[str]] = None
|
|
255
|
+
) -> Dict[str, Any]:
|
|
256
|
+
"""
|
|
257
|
+
Summarize with automatic fallback to other providers.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
items: List of news items
|
|
261
|
+
providers_config: Providers configuration
|
|
262
|
+
default_provider: Default provider to try first
|
|
263
|
+
timeframe_days: Days covered
|
|
264
|
+
timezone: Timezone name
|
|
265
|
+
fallback_order: Optional custom fallback order
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Summary result with provider info
|
|
269
|
+
"""
|
|
270
|
+
if fallback_order is None:
|
|
271
|
+
fallback_order = ["openai", "anthropic", "ollama", "qwen", "openrouter"]
|
|
272
|
+
|
|
273
|
+
# Try default provider first
|
|
274
|
+
provider_list = [default_provider] + [p for p in fallback_order if p != default_provider]
|
|
275
|
+
|
|
276
|
+
last_error = None
|
|
277
|
+
|
|
278
|
+
for provider_name in provider_list:
|
|
279
|
+
provider_config = providers_config.get("providers", {}).get(provider_name)
|
|
280
|
+
|
|
281
|
+
if not provider_config:
|
|
282
|
+
continue
|
|
283
|
+
|
|
284
|
+
try:
|
|
285
|
+
result = summarize(
|
|
286
|
+
items=items,
|
|
287
|
+
provider=provider_name,
|
|
288
|
+
model=provider_config["model"],
|
|
289
|
+
temperature=provider_config.get("temperature", 0.2),
|
|
290
|
+
timeframe_days=timeframe_days,
|
|
291
|
+
timezone=timezone,
|
|
292
|
+
provider_config=provider_config
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
return result
|
|
296
|
+
|
|
297
|
+
except Exception as e:
|
|
298
|
+
last_error = e
|
|
299
|
+
print(f"Provider {provider_name} failed: {e}")
|
|
300
|
+
continue
|
|
301
|
+
|
|
302
|
+
# All providers failed
|
|
303
|
+
raise RuntimeError(f"All providers failed. Last error: {last_error}")
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
def generate_placeholder_summary(items: List[Dict[str, Any]], timeframe_days: int) -> str:
|
|
307
|
+
"""
|
|
308
|
+
Generate placeholder summary for dry-run mode.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
items: List of items
|
|
312
|
+
timeframe_days: Days covered
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
Placeholder markdown summary
|
|
316
|
+
"""
|
|
317
|
+
return f"""# Drupal Newesleter Summary (Dry Run)
|
|
318
|
+
|
|
319
|
+
**Timeframe:** Last {timeframe_days} days
|
|
320
|
+
|
|
321
|
+
## Summary
|
|
322
|
+
|
|
323
|
+
This is a placeholder summary for dry-run mode.
|
|
324
|
+
Total items collected: {len(items)}
|
|
325
|
+
|
|
326
|
+
### Core Updates
|
|
327
|
+
No significant core updates this week.
|
|
328
|
+
|
|
329
|
+
### New Modules
|
|
330
|
+
{len([i for i in items if i.get('source_type') == 'rss'])} new modules found.
|
|
331
|
+
|
|
332
|
+
### AI and Automation
|
|
333
|
+
No major AI updates this week.
|
|
334
|
+
|
|
335
|
+
---
|
|
336
|
+
*This is a dry-run summary. Enable AI provider for actual summaries.*
|
|
337
|
+
"""
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""Cache manager for Drupal Newsletter using SQLite."""
|
|
2
|
+
import sqlite3
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Optional, Dict, Any
|
|
5
|
+
from datetime import datetime, timedelta
|
|
6
|
+
import json
|
|
7
|
+
import hashlib
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class CacheManager:
|
|
11
|
+
"""Manages persistent caching with SQLite."""
|
|
12
|
+
|
|
13
|
+
def __init__(self, db_path: str = "./cache/cache.db", ttl_days: int = 21):
|
|
14
|
+
"""
|
|
15
|
+
Initialize cache manager.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
db_path: Path to SQLite database
|
|
19
|
+
ttl_days: Time-to-live for cache entries in days
|
|
20
|
+
"""
|
|
21
|
+
self.db_path = Path(db_path)
|
|
22
|
+
self.ttl_days = ttl_days
|
|
23
|
+
self._init_db()
|
|
24
|
+
|
|
25
|
+
def _init_db(self):
|
|
26
|
+
"""Initialize database schema."""
|
|
27
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
|
|
29
|
+
conn = sqlite3.connect(self.db_path)
|
|
30
|
+
cursor = conn.cursor()
|
|
31
|
+
|
|
32
|
+
cursor.execute("""
|
|
33
|
+
CREATE TABLE IF NOT EXISTS cache (
|
|
34
|
+
key TEXT PRIMARY KEY,
|
|
35
|
+
value TEXT NOT NULL,
|
|
36
|
+
created_at TEXT NOT NULL,
|
|
37
|
+
expires_at TEXT NOT NULL
|
|
38
|
+
)
|
|
39
|
+
""")
|
|
40
|
+
|
|
41
|
+
cursor.execute("""
|
|
42
|
+
CREATE INDEX IF NOT EXISTS idx_expires_at ON cache(expires_at)
|
|
43
|
+
""")
|
|
44
|
+
|
|
45
|
+
conn.commit()
|
|
46
|
+
conn.close()
|
|
47
|
+
|
|
48
|
+
def _compute_key(self, url: str) -> str:
|
|
49
|
+
"""Compute cache key from URL."""
|
|
50
|
+
return hashlib.sha256(url.encode()).hexdigest()
|
|
51
|
+
|
|
52
|
+
def get(self, url: str) -> Optional[Dict[str, Any]]:
|
|
53
|
+
"""
|
|
54
|
+
Get cached value for URL.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Cached value if found and not expired, None otherwise
|
|
58
|
+
"""
|
|
59
|
+
key = self._compute_key(url)
|
|
60
|
+
conn = sqlite3.connect(self.db_path)
|
|
61
|
+
cursor = conn.cursor()
|
|
62
|
+
|
|
63
|
+
cursor.execute("""
|
|
64
|
+
SELECT value, expires_at FROM cache WHERE key = ?
|
|
65
|
+
""", (key,))
|
|
66
|
+
|
|
67
|
+
row = cursor.fetchone()
|
|
68
|
+
conn.close()
|
|
69
|
+
|
|
70
|
+
if not row:
|
|
71
|
+
return None
|
|
72
|
+
|
|
73
|
+
value_json, expires_at = row
|
|
74
|
+
expires_dt = datetime.fromisoformat(expires_at)
|
|
75
|
+
|
|
76
|
+
# Check expiration
|
|
77
|
+
if datetime.now() > expires_dt:
|
|
78
|
+
self.delete(url)
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
return json.loads(value_json)
|
|
83
|
+
except json.JSONDecodeError:
|
|
84
|
+
return None
|
|
85
|
+
|
|
86
|
+
def set(self, url: str, value: Dict[str, Any]):
|
|
87
|
+
"""
|
|
88
|
+
Set cached value for URL.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
url: URL to cache
|
|
92
|
+
value: Dictionary value to cache
|
|
93
|
+
"""
|
|
94
|
+
key = self._compute_key(url)
|
|
95
|
+
created_at = datetime.now().isoformat()
|
|
96
|
+
expires_at = (datetime.now() + timedelta(days=self.ttl_days)).isoformat()
|
|
97
|
+
|
|
98
|
+
value_json = json.dumps(value, ensure_ascii=False)
|
|
99
|
+
|
|
100
|
+
conn = sqlite3.connect(self.db_path)
|
|
101
|
+
cursor = conn.cursor()
|
|
102
|
+
|
|
103
|
+
cursor.execute("""
|
|
104
|
+
INSERT OR REPLACE INTO cache (key, value, created_at, expires_at)
|
|
105
|
+
VALUES (?, ?, ?, ?)
|
|
106
|
+
""", (key, value_json, created_at, expires_at))
|
|
107
|
+
|
|
108
|
+
conn.commit()
|
|
109
|
+
conn.close()
|
|
110
|
+
|
|
111
|
+
def delete(self, url: str):
|
|
112
|
+
"""Delete cached value for URL."""
|
|
113
|
+
key = self._compute_key(url)
|
|
114
|
+
conn = sqlite3.connect(self.db_path)
|
|
115
|
+
cursor = conn.cursor()
|
|
116
|
+
|
|
117
|
+
cursor.execute("DELETE FROM cache WHERE key = ?", (key,))
|
|
118
|
+
|
|
119
|
+
conn.commit()
|
|
120
|
+
conn.close()
|
|
121
|
+
|
|
122
|
+
def purge_expired(self) -> int:
|
|
123
|
+
"""
|
|
124
|
+
Remove all expired cache entries.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Number of entries removed
|
|
128
|
+
"""
|
|
129
|
+
conn = sqlite3.connect(self.db_path)
|
|
130
|
+
cursor = conn.cursor()
|
|
131
|
+
|
|
132
|
+
now = datetime.now().isoformat()
|
|
133
|
+
cursor.execute("DELETE FROM cache WHERE expires_at < ?", (now,))
|
|
134
|
+
|
|
135
|
+
deleted_count = cursor.rowcount
|
|
136
|
+
conn.commit()
|
|
137
|
+
conn.close()
|
|
138
|
+
|
|
139
|
+
return deleted_count
|
|
140
|
+
|
|
141
|
+
def clear_all(self):
|
|
142
|
+
"""Clear all cache entries."""
|
|
143
|
+
conn = sqlite3.connect(self.db_path)
|
|
144
|
+
cursor = conn.cursor()
|
|
145
|
+
|
|
146
|
+
cursor.execute("DELETE FROM cache")
|
|
147
|
+
|
|
148
|
+
conn.commit()
|
|
149
|
+
conn.close()
|
|
150
|
+
|
|
151
|
+
def get_stats(self) -> Dict[str, int]:
|
|
152
|
+
"""
|
|
153
|
+
Get cache statistics.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
Dictionary with total, expired, and valid counts
|
|
157
|
+
"""
|
|
158
|
+
conn = sqlite3.connect(self.db_path)
|
|
159
|
+
cursor = conn.cursor()
|
|
160
|
+
|
|
161
|
+
cursor.execute("SELECT COUNT(*) FROM cache")
|
|
162
|
+
total = cursor.fetchone()[0]
|
|
163
|
+
|
|
164
|
+
now = datetime.now().isoformat()
|
|
165
|
+
cursor.execute("SELECT COUNT(*) FROM cache WHERE expires_at < ?", (now,))
|
|
166
|
+
expired = cursor.fetchone()[0]
|
|
167
|
+
|
|
168
|
+
conn.close()
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
"total": total,
|
|
172
|
+
"expired": expired,
|
|
173
|
+
"valid": total - expired
|
|
174
|
+
}
|
drupal_news/cli.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
CLI entry points for drupal-news package.
|
|
4
|
+
"""
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def main():
|
|
9
|
+
"""Main CLI entry point for drupal-news command."""
|
|
10
|
+
from drupal_news.index import main as index_main
|
|
11
|
+
sys.exit(index_main())
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def scheduler_main():
|
|
15
|
+
"""Scheduler CLI entry point for drupal-news-scheduler command."""
|
|
16
|
+
from drupal_news.scheduler import main as scheduler_main_func
|
|
17
|
+
sys.exit(scheduler_main_func())
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def email_main():
|
|
21
|
+
"""Email CLI entry point for drupal-news-email command."""
|
|
22
|
+
from drupal_news.email_sender import main as email_main_func
|
|
23
|
+
sys.exit(email_main_func())
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def viewer_main():
|
|
27
|
+
"""Web viewer CLI entry point for drupal-news-viewer command."""
|
|
28
|
+
from drupal_news.viewer import app
|
|
29
|
+
|
|
30
|
+
print("=" * 60)
|
|
31
|
+
print("Drupal News Viewer")
|
|
32
|
+
print("=" * 60)
|
|
33
|
+
print("Starting server on http://localhost:5000")
|
|
34
|
+
print("Press Ctrl+C to stop")
|
|
35
|
+
print("=" * 60)
|
|
36
|
+
|
|
37
|
+
app.run(debug=False, host='0.0.0.0', port=5000)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
if __name__ == "__main__":
|
|
41
|
+
main()
|