crossref-local 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. crossref_local/__init__.py +38 -16
  2. crossref_local/__main__.py +0 -0
  3. crossref_local/_aio/__init__.py +30 -0
  4. crossref_local/_aio/_impl.py +238 -0
  5. crossref_local/_cache/__init__.py +15 -0
  6. crossref_local/_cache/export.py +100 -0
  7. crossref_local/_cache/utils.py +93 -0
  8. crossref_local/_cache/viz.py +296 -0
  9. crossref_local/_cli/__init__.py +9 -0
  10. crossref_local/_cli/cache.py +179 -0
  11. crossref_local/_cli/cli.py +512 -0
  12. crossref_local/_cli/completion.py +245 -0
  13. crossref_local/_cli/main.py +20 -0
  14. crossref_local/_cli/mcp.py +351 -0
  15. crossref_local/_cli/mcp_server.py +413 -0
  16. crossref_local/_core/__init__.py +58 -0
  17. crossref_local/{api.py → _core/api.py} +130 -36
  18. crossref_local/{citations.py → _core/citations.py} +55 -26
  19. crossref_local/{config.py → _core/config.py} +57 -42
  20. crossref_local/{db.py → _core/db.py} +32 -26
  21. crossref_local/{fts.py → _core/fts.py} +18 -14
  22. crossref_local/{models.py → _core/models.py} +11 -6
  23. crossref_local/{impact_factor → _impact_factor}/__init__.py +0 -0
  24. crossref_local/{impact_factor → _impact_factor}/calculator.py +0 -0
  25. crossref_local/{impact_factor → _impact_factor}/journal_lookup.py +0 -0
  26. crossref_local/_remote/__init__.py +56 -0
  27. crossref_local/_remote/base.py +356 -0
  28. crossref_local/_remote/collections.py +175 -0
  29. crossref_local/_server/__init__.py +140 -0
  30. crossref_local/_server/middleware.py +25 -0
  31. crossref_local/_server/models.py +129 -0
  32. crossref_local/_server/routes_citations.py +98 -0
  33. crossref_local/_server/routes_collections.py +282 -0
  34. crossref_local/_server/routes_compat.py +102 -0
  35. crossref_local/_server/routes_works.py +128 -0
  36. crossref_local/_server/server.py +19 -0
  37. crossref_local/aio.py +30 -206
  38. crossref_local/cache.py +466 -0
  39. crossref_local/cli.py +5 -447
  40. crossref_local/jobs.py +169 -0
  41. crossref_local/mcp_server.py +5 -199
  42. crossref_local/remote.py +5 -261
  43. crossref_local/server.py +5 -349
  44. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/METADATA +88 -24
  45. crossref_local-0.5.0.dist-info/RECORD +47 -0
  46. crossref_local-0.3.1.dist-info/RECORD +0 -20
  47. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/WHEEL +0 -0
  48. {crossref_local-0.3.1.dist-info → crossref_local-0.5.0.dist-info}/entry_points.txt +0 -0
crossref_local/cli.py CHANGED
@@ -1,450 +1,8 @@
1
- """Command-line interface for crossref_local."""
1
+ #!/usr/bin/env python3
2
+ """Backward compatibility: re-export from _cli."""
2
3
 
3
- import click
4
- import json
5
- import logging
6
- import re
7
- import sys
8
- from typing import Optional
4
+ from ._cli import cli, main
9
5
 
10
- from . import search, get, count, info, __version__
6
+ __all__ = ["cli", "main"]
11
7
 
12
- from .impact_factor import ImpactFactorCalculator
13
-
14
- # Suppress noisy warnings from impact_factor module in CLI
15
- logging.getLogger("crossref_local.impact_factor").setLevel(logging.ERROR)
16
-
17
-
18
- def _strip_xml_tags(text: str) -> str:
19
- """Strip XML/JATS tags from abstract text."""
20
- if not text:
21
- return text
22
- # Remove XML tags
23
- text = re.sub(r"<[^>]+>", " ", text)
24
- # Collapse multiple spaces
25
- text = re.sub(r"\s+", " ", text)
26
- return text.strip()
27
-
28
-
29
- class AliasedGroup(click.Group):
30
- """Click group that supports command aliases."""
31
-
32
- def __init__(self, *args, **kwargs):
33
- super().__init__(*args, **kwargs)
34
- self._aliases = {}
35
-
36
- def command(self, *args, aliases=None, **kwargs):
37
- """Decorator that registers aliases for commands."""
38
-
39
- def decorator(f):
40
- cmd = super(AliasedGroup, self).command(*args, **kwargs)(f)
41
- if aliases:
42
- for alias in aliases:
43
- self._aliases[alias] = cmd.name
44
- return cmd
45
-
46
- return decorator
47
-
48
- def get_command(self, ctx, cmd_name):
49
- """Resolve aliases to actual commands."""
50
- cmd_name = self._aliases.get(cmd_name, cmd_name)
51
- return super().get_command(ctx, cmd_name)
52
-
53
- def format_commands(self, ctx, formatter):
54
- """Format commands with aliases shown inline."""
55
- commands = []
56
- for subcommand in self.list_commands(ctx):
57
- cmd = self.get_command(ctx, subcommand)
58
- if cmd is None or cmd.hidden:
59
- continue
60
-
61
- # Find aliases for this command
62
- aliases = [a for a, c in self._aliases.items() if c == subcommand]
63
- if aliases:
64
- name = f"{subcommand} ({', '.join(aliases)})"
65
- else:
66
- name = subcommand
67
-
68
- help_text = cmd.get_short_help_str(limit=50)
69
- commands.append((name, help_text))
70
-
71
- if commands:
72
- with formatter.section("Commands"):
73
- formatter.write_dl(commands)
74
-
75
-
76
- CONTEXT_SETTINGS = {"help_option_names": ["-h", "--help"]}
77
-
78
-
79
- @click.group(cls=AliasedGroup, context_settings=CONTEXT_SETTINGS)
80
- @click.version_option(version=__version__, prog_name="crossref-local")
81
- @click.option(
82
- "--remote", "-r", is_flag=True, help="Use remote API instead of local database"
83
- )
84
- @click.option(
85
- "--api-url",
86
- envvar="CROSSREF_LOCAL_API",
87
- help="API URL for remote mode (default: auto-detect)",
88
- )
89
- @click.pass_context
90
- def cli(ctx, remote: bool, api_url: str):
91
- """Local CrossRef database with 167M+ works and full-text search.
92
-
93
- Supports both local database access and remote API mode.
94
-
95
- \b
96
- Local mode (default if database found):
97
- crossref-local search "machine learning"
98
-
99
- \b
100
- Remote mode (via SSH tunnel):
101
- ssh -L 3333:127.0.0.1:3333 nas # First, create tunnel
102
- crossref-local --remote search "machine learning"
103
- """
104
- from .config import Config
105
-
106
- ctx.ensure_object(dict)
107
-
108
- if api_url:
109
- Config.set_api_url(api_url)
110
- elif remote:
111
- Config.set_mode("remote")
112
-
113
-
114
- def _get_if_fast(db, issn: str, cache: dict) -> Optional[float]:
115
- """Fast IF lookup from pre-computed OpenAlex data."""
116
- if issn in cache:
117
- return cache[issn]
118
- row = db.fetchone(
119
- "SELECT two_year_mean_citedness FROM journals_openalex WHERE issns LIKE ?",
120
- (f"%{issn}%",)
121
- )
122
- cache[issn] = row["two_year_mean_citedness"] if row else None
123
- return cache[issn]
124
-
125
-
126
- @cli.command("search", aliases=["s"], context_settings=CONTEXT_SETTINGS)
127
- @click.argument("query")
128
- @click.option("-n", "--number", "limit", default=10, show_default=True, help="Number of results")
129
- @click.option("-o", "--offset", default=0, help="Skip first N results")
130
- @click.option("-a", "--abstracts", is_flag=True, help="Show abstracts")
131
- @click.option("-A", "--authors", is_flag=True, help="Show authors")
132
- @click.option("-if", "--impact-factor", "with_if", is_flag=True, help="Show journal impact factor")
133
- @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
134
- def search_cmd(query: str, limit: int, offset: int, abstracts: bool, authors: bool, with_if: bool, as_json: bool):
135
- """Search for works by title, abstract, or authors."""
136
- from .db import get_db
137
- results = search(query, limit=limit, offset=offset)
138
-
139
- # Cache for fast IF lookups
140
- if_cache = {}
141
- db = get_db() if with_if else None
142
-
143
- if as_json:
144
- output = {
145
- "query": results.query,
146
- "total": results.total,
147
- "elapsed_ms": results.elapsed_ms,
148
- "works": [w.to_dict() for w in results.works],
149
- }
150
- click.echo(json.dumps(output, indent=2))
151
- else:
152
- click.echo(f"Found {results.total:,} matches in {results.elapsed_ms:.1f}ms\n")
153
- for i, work in enumerate(results.works, start=offset + 1):
154
- title = _strip_xml_tags(work.title) if work.title else "Untitled"
155
- year = f"({work.year})" if work.year else ""
156
- click.echo(f"{i}. {title} {year}")
157
- click.echo(f" DOI: {work.doi}")
158
- if authors and work.authors:
159
- authors_str = ", ".join(work.authors[:5])
160
- if len(work.authors) > 5:
161
- authors_str += f" et al. ({len(work.authors)} total)"
162
- click.echo(f" Authors: {authors_str}")
163
- if work.journal:
164
- journal_line = f" Journal: {work.journal}"
165
- # Fast IF lookup from pre-computed table
166
- if with_if and work.issn:
167
- impact_factor = _get_if_fast(db, work.issn, if_cache)
168
- if impact_factor is not None:
169
- journal_line += f" (IF: {impact_factor:.2f}, OpenAlex)"
170
- click.echo(journal_line)
171
- if abstracts and work.abstract:
172
- # Strip XML tags and truncate
173
- abstract = _strip_xml_tags(work.abstract)
174
- if len(abstract) > 500:
175
- abstract = abstract[:500] + "..."
176
- click.echo(f" Abstract: {abstract}")
177
- click.echo()
178
-
179
-
180
- @cli.command("get", aliases=["g"], context_settings=CONTEXT_SETTINGS)
181
- @click.argument("doi")
182
- @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
183
- @click.option("--citation", is_flag=True, help="Output as citation")
184
- def get_cmd(doi: str, as_json: bool, citation: bool):
185
- """Get a work by DOI."""
186
- work = get(doi)
187
-
188
- if work is None:
189
- click.echo(f"DOI not found: {doi}", err=True)
190
- sys.exit(1)
191
-
192
- if as_json:
193
- click.echo(json.dumps(work.to_dict(), indent=2))
194
- elif citation:
195
- click.echo(work.citation())
196
- else:
197
- click.echo(f"Title: {work.title}")
198
- click.echo(f"Authors: {', '.join(work.authors)}")
199
- click.echo(f"Year: {work.year}")
200
- click.echo(f"Journal: {work.journal}")
201
- click.echo(f"DOI: {work.doi}")
202
- if work.citation_count:
203
- click.echo(f"Citations: {work.citation_count}")
204
-
205
-
206
- @cli.command("count", aliases=["c"], context_settings=CONTEXT_SETTINGS)
207
- @click.argument("query")
208
- def count_cmd(query: str):
209
- """Count matching works."""
210
- n = count(query)
211
- click.echo(f"{n:,}")
212
-
213
-
214
- @cli.command("info", aliases=["i"], context_settings=CONTEXT_SETTINGS)
215
- @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
216
- def info_cmd(as_json: bool):
217
- """Show database/API information."""
218
- db_info = info()
219
-
220
- if as_json:
221
- click.echo(json.dumps(db_info, indent=2))
222
- else:
223
- mode = db_info.get("mode", "local")
224
- if mode == "remote":
225
- click.echo("CrossRef Local API (Remote)")
226
- click.echo("-" * 40)
227
- click.echo(f"API URL: {db_info.get('api_url', 'unknown')}")
228
- click.echo(f"Status: {db_info.get('status', 'unknown')}")
229
- else:
230
- click.echo("CrossRef Local Database")
231
- click.echo("-" * 40)
232
- click.echo(f"Database: {db_info.get('db_path', 'unknown')}")
233
- click.echo(f"Works: {db_info.get('works', 0):,}")
234
- click.echo(f"FTS indexed: {db_info.get('fts_indexed', 0):,}")
235
- click.echo(f"Citations: {db_info.get('citations', 0):,}")
236
-
237
-
238
- @cli.command("impact-factor", aliases=["if"], context_settings=CONTEXT_SETTINGS)
239
- @click.argument("journal")
240
- @click.option("-y", "--year", default=2023, help="Target year")
241
- @click.option("-w", "--window", default=2, help="Citation window years")
242
- @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
243
- def impact_factor_cmd(journal: str, year: int, window: int, as_json: bool):
244
- """Calculate impact factor for a journal."""
245
- with ImpactFactorCalculator() as calc:
246
- result = calc.calculate_impact_factor(
247
- journal_identifier=journal,
248
- target_year=year,
249
- window_years=window,
250
- )
251
-
252
- if as_json:
253
- click.echo(json.dumps(result, indent=2))
254
- else:
255
- click.echo(f"Journal: {result['journal']}")
256
- click.echo(f"Year: {result['target_year']}")
257
- click.echo(f"Window: {result['window_range']}")
258
- click.echo(f"Articles: {result['total_articles']:,}")
259
- click.echo(f"Citations: {result['total_citations']:,}")
260
- click.echo(f"Impact Factor: {result['impact_factor']:.3f}")
261
-
262
-
263
- @cli.command(context_settings=CONTEXT_SETTINGS)
264
- def setup():
265
- """Check setup status and configuration."""
266
- from .config import DEFAULT_DB_PATHS, DEFAULT_API_URLS
267
- import os
268
-
269
- click.echo("CrossRef Local - Setup Status")
270
- click.echo("=" * 50)
271
- click.echo()
272
-
273
- # Check environment variables
274
- click.echo("Environment Variables:")
275
- env_db = os.environ.get("CROSSREF_LOCAL_DB")
276
- env_api = os.environ.get("CROSSREF_LOCAL_API")
277
- env_mode = os.environ.get("CROSSREF_LOCAL_MODE")
278
-
279
- if env_db:
280
- status = "OK" if os.path.exists(env_db) else "NOT FOUND"
281
- click.echo(f" CROSSREF_LOCAL_DB: {env_db} ({status})")
282
- else:
283
- click.echo(" CROSSREF_LOCAL_DB: (not set)")
284
-
285
- if env_api:
286
- click.echo(f" CROSSREF_LOCAL_API: {env_api}")
287
- else:
288
- click.echo(" CROSSREF_LOCAL_API: (not set)")
289
-
290
- if env_mode:
291
- click.echo(f" CROSSREF_LOCAL_MODE: {env_mode}")
292
-
293
- click.echo()
294
-
295
- # Check default database paths
296
- click.echo("Local Database Locations:")
297
- db_found = None
298
- for path in DEFAULT_DB_PATHS:
299
- if path.exists():
300
- click.echo(f" [OK] {path}")
301
- if db_found is None:
302
- db_found = path
303
- else:
304
- click.echo(f" [ ] {path}")
305
-
306
- click.echo()
307
-
308
- # Check remote API endpoints
309
- click.echo("Remote API Endpoints:")
310
- api_found = None
311
- for url in DEFAULT_API_URLS:
312
- try:
313
- import urllib.request
314
-
315
- req = urllib.request.Request(f"{url}/health", method="GET")
316
- with urllib.request.urlopen(req, timeout=3) as resp:
317
- if resp.status == 200:
318
- click.echo(f" [OK] {url}")
319
- if api_found is None:
320
- api_found = url
321
- else:
322
- click.echo(f" [ ] {url}")
323
- except Exception:
324
- click.echo(f" [ ] {url}")
325
-
326
- click.echo()
327
-
328
- # Summary and recommendations
329
- if db_found:
330
- click.echo(f"Local database: {db_found}")
331
- try:
332
- db_info = info()
333
- click.echo(f" Works: {db_info.get('works', 0):,}")
334
- click.echo(f" FTS indexed: {db_info.get('fts_indexed', 0):,}")
335
- except Exception as e:
336
- click.echo(f" Error: {e}", err=True)
337
- click.echo()
338
- click.echo("Ready! Try:")
339
- click.echo(' crossref-local search "machine learning"')
340
- elif api_found:
341
- click.echo(f"Remote API available: {api_found}")
342
- click.echo()
343
- click.echo("Ready! Try:")
344
- click.echo(' crossref-local --remote search "machine learning"')
345
- click.echo()
346
- click.echo("Or set environment:")
347
- click.echo(" export CROSSREF_LOCAL_MODE=remote")
348
- else:
349
- click.echo("No database or API found!")
350
- click.echo()
351
- click.echo("Options:")
352
- click.echo(" 1. Local database:")
353
- click.echo(" export CROSSREF_LOCAL_DB=/path/to/crossref.db")
354
- click.echo()
355
- click.echo(" 2. Remote API (via SSH tunnel):")
356
- click.echo(" ssh -L 3333:127.0.0.1:3333 your-nas")
357
- click.echo(" crossref-local --remote search 'query'")
358
-
359
-
360
- @cli.command(context_settings=CONTEXT_SETTINGS)
361
- @click.option(
362
- "-t",
363
- "--transport",
364
- type=click.Choice(["stdio", "sse", "http"]),
365
- default="stdio",
366
- help="Transport protocol (stdio for Claude Desktop)",
367
- )
368
- @click.option("--host", default="localhost", help="Host for HTTP/SSE transport")
369
- @click.option("--port", default=8082, type=int, help="Port for HTTP/SSE transport")
370
- def serve(transport: str, host: str, port: int):
371
- """Run MCP server for Claude integration.
372
-
373
- \b
374
- Claude Desktop configuration (claude_desktop_config.json):
375
- {
376
- "mcpServers": {
377
- "crossref": {
378
- "command": "crossref-local",
379
- "args": ["serve"]
380
- }
381
- }
382
- }
383
-
384
- \b
385
- Or with explicit path:
386
- {
387
- "mcpServers": {
388
- "crossref": {
389
- "command": "python",
390
- "args": ["-m", "crossref_local.mcp_server"]
391
- }
392
- }
393
- }
394
- """
395
- try:
396
- from .mcp_server import run_server
397
- except ImportError:
398
- click.echo(
399
- "MCP server requires fastmcp. Install with:\n"
400
- " pip install crossref-local[mcp]",
401
- err=True,
402
- )
403
- sys.exit(1)
404
-
405
- run_server(transport=transport, host=host, port=port)
406
-
407
-
408
- @cli.command(context_settings=CONTEXT_SETTINGS)
409
- @click.option("--host", default="0.0.0.0", help="Host to bind")
410
- @click.option("--port", default=3333, type=int, help="Port to listen on")
411
- def api(host: str, port: int):
412
- """Run HTTP API server with FTS5 search.
413
-
414
- \b
415
- This runs a FastAPI server that provides proper full-text search
416
- using FTS5 index across all 167M+ papers.
417
-
418
- \b
419
- Example:
420
- crossref-local api # Run on 0.0.0.0:3333
421
- crossref-local api --port 8080 # Custom port
422
-
423
- \b
424
- Then from a client:
425
- curl "http://localhost:3333/search?q=CRISPR&limit=10"
426
- curl "http://localhost:3333/get/10.1038/nature12373"
427
- """
428
- try:
429
- from .server import run_server
430
- except ImportError:
431
- click.echo(
432
- "API server requires fastapi and uvicorn. Install with:\n"
433
- " pip install fastapi uvicorn",
434
- err=True,
435
- )
436
- sys.exit(1)
437
-
438
- click.echo(f"Starting CrossRef Local API server on {host}:{port}")
439
- click.echo(f"Search endpoint: http://{host}:{port}/search?q=<query>")
440
- click.echo(f"Docs: http://{host}:{port}/docs")
441
- run_server(host=host, port=port)
442
-
443
-
444
- def main():
445
- """Entry point for CLI."""
446
- cli()
447
-
448
-
449
- if __name__ == "__main__":
450
- main()
8
+ # EOF
crossref_local/jobs.py ADDED
@@ -0,0 +1,169 @@
1
+ #!/usr/bin/env python3
2
+ # Timestamp: 2026-01-29
3
+ """Simple job/queue system for batch operations."""
4
+
5
+ import json as _json
6
+ import time as _time
7
+ import uuid as _uuid
8
+ from dataclasses import dataclass as _dataclass
9
+ from dataclasses import field as _field
10
+ from pathlib import Path as _Path
11
+ from typing import Any as _Any
12
+ from typing import Callable as _Callable
13
+ from typing import Optional as _Optional
14
+
15
+ __all__ = ["create", "get", "list_jobs", "run"]
16
+
17
+ # Default jobs directory
18
+ _JOBS_DIR = _Path.home() / ".crossref_local" / "jobs"
19
+
20
+
21
+ @_dataclass
22
+ class Job:
23
+ """A batch job with progress tracking."""
24
+
25
+ id: str
26
+ items: list[str] # e.g., DOIs to process
27
+ completed: list[str] = _field(default_factory=list)
28
+ failed: dict[str, str] = _field(default_factory=dict) # item -> error
29
+ status: str = "pending" # pending, running, completed, failed
30
+ created_at: float = _field(default_factory=_time.time)
31
+ updated_at: float = _field(default_factory=_time.time)
32
+ metadata: dict[str, _Any] = _field(default_factory=dict)
33
+
34
+ @property
35
+ def pending(self) -> list[str]:
36
+ """Items not yet processed."""
37
+ done = set(self.completed) | set(self.failed.keys())
38
+ return [i for i in self.items if i not in done]
39
+
40
+ @property
41
+ def progress(self) -> float:
42
+ """Progress as percentage (0-100)."""
43
+ if not self.items:
44
+ return 100.0
45
+ return len(self.completed) / len(self.items) * 100
46
+
47
+ def to_dict(self) -> dict:
48
+ return {
49
+ "id": self.id,
50
+ "items": self.items,
51
+ "completed": self.completed,
52
+ "failed": self.failed,
53
+ "status": self.status,
54
+ "created_at": self.created_at,
55
+ "updated_at": self.updated_at,
56
+ "metadata": self.metadata,
57
+ }
58
+
59
+ @classmethod
60
+ def from_dict(cls, data: dict) -> "Job":
61
+ return cls(**data)
62
+
63
+
64
+ class JobQueue:
65
+ """Manages job persistence and execution."""
66
+
67
+ def __init__(self, jobs_dir: _Optional[_Path] = None):
68
+ self.jobs_dir = _Path(jobs_dir) if jobs_dir else _JOBS_DIR
69
+ self.jobs_dir.mkdir(parents=True, exist_ok=True)
70
+
71
+ def _job_path(self, job_id: str) -> _Path:
72
+ return self.jobs_dir / f"{job_id}.json"
73
+
74
+ def save(self, job: Job) -> None:
75
+ """Save job to disk."""
76
+ job.updated_at = _time.time()
77
+ self._job_path(job.id).write_text(_json.dumps(job.to_dict(), indent=2))
78
+
79
+ def load(self, job_id: str) -> _Optional[Job]:
80
+ """Load job from disk."""
81
+ path = self._job_path(job_id)
82
+ if not path.exists():
83
+ return None
84
+ return Job.from_dict(_json.loads(path.read_text()))
85
+
86
+ def create(self, items: list[str], **metadata) -> Job:
87
+ """Create a new job."""
88
+ job = Job(id=str(_uuid.uuid4())[:8], items=items, metadata=metadata)
89
+ self.save(job)
90
+ return job
91
+
92
+ def list(self) -> list[Job]:
93
+ """List all jobs."""
94
+ jobs = []
95
+ for path in self.jobs_dir.glob("*.json"):
96
+ try:
97
+ jobs.append(Job.from_dict(_json.loads(path.read_text())))
98
+ except Exception:
99
+ continue
100
+ return sorted(jobs, key=lambda j: j.created_at, reverse=True)
101
+
102
+ def delete(self, job_id: str) -> bool:
103
+ """Delete a job."""
104
+ path = self._job_path(job_id)
105
+ if path.exists():
106
+ path.unlink()
107
+ return True
108
+ return False
109
+
110
+ def run(
111
+ self,
112
+ job: Job,
113
+ processor: _Callable[[str], _Any],
114
+ on_progress: _Optional[_Callable[[Job], None]] = None,
115
+ ) -> Job:
116
+ """Run a job with a processor function."""
117
+ job.status = "running"
118
+ self.save(job)
119
+
120
+ for item in job.pending:
121
+ try:
122
+ processor(item)
123
+ job.completed.append(item)
124
+ except Exception as e:
125
+ job.failed[item] = str(e)
126
+ self.save(job)
127
+ if on_progress:
128
+ on_progress(job)
129
+
130
+ job.status = "completed" if not job.failed else "failed"
131
+ self.save(job)
132
+ return job
133
+
134
+
135
+ # Module-level convenience functions
136
+ _queue = None
137
+
138
+
139
+ def _get_queue() -> JobQueue:
140
+ global _queue
141
+ if _queue is None:
142
+ _queue = JobQueue()
143
+ return _queue
144
+
145
+
146
+ def create(items: list[str], **metadata) -> Job:
147
+ """Create a new job."""
148
+ return _get_queue().create(items, **metadata)
149
+
150
+
151
+ def get(job_id: str) -> _Optional[Job]:
152
+ """Get a job by ID."""
153
+ return _get_queue().load(job_id)
154
+
155
+
156
+ def list_jobs() -> list[Job]:
157
+ """List all jobs."""
158
+ return _get_queue().list()
159
+
160
+
161
+ def run(job_id: str, processor: _Callable[[str], _Any]) -> Job:
162
+ """Run or resume a job."""
163
+ job = get(job_id)
164
+ if not job:
165
+ raise ValueError(f"Job not found: {job_id}")
166
+ return _get_queue().run(job, processor)
167
+
168
+
169
+ # EOF