citesentry 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. citesentry-0.1.1/.claude/settings.local.json +18 -0
  2. citesentry-0.1.1/.github/workflows/publish.yml +26 -0
  3. citesentry-0.1.1/CLAUDE.md +36 -0
  4. citesentry-0.1.1/PKG-INFO +201 -0
  5. citesentry-0.1.1/README.md +173 -0
  6. citesentry-0.1.1/citesentry/__init__.py +3 -0
  7. citesentry-0.1.1/citesentry/cache.py +70 -0
  8. citesentry-0.1.1/citesentry/checks/__init__.py +0 -0
  9. citesentry-0.1.1/citesentry/checks/existence.py +219 -0
  10. citesentry-0.1.1/citesentry/checks/relevance.py +176 -0
  11. citesentry-0.1.1/citesentry/checks/url_liveness.py +143 -0
  12. citesentry-0.1.1/citesentry/cli.py +193 -0
  13. citesentry-0.1.1/citesentry/config.py +46 -0
  14. citesentry-0.1.1/citesentry/core/__init__.py +4 -0
  15. citesentry-0.1.1/citesentry/core/cascade.py +17 -0
  16. citesentry-0.1.1/citesentry/core/engine.py +94 -0
  17. citesentry-0.1.1/citesentry/core/verdict.py +56 -0
  18. citesentry-0.1.1/citesentry/llm/__init__.py +3 -0
  19. citesentry-0.1.1/citesentry/llm/base.py +13 -0
  20. citesentry-0.1.1/citesentry/llm/deepseek.py +41 -0
  21. citesentry-0.1.1/citesentry/llm/mcp_sampling.py +17 -0
  22. citesentry-0.1.1/citesentry/mcp_server.py +156 -0
  23. citesentry-0.1.1/citesentry/models.py +81 -0
  24. citesentry-0.1.1/citesentry/parse/__init__.py +3 -0
  25. citesentry-0.1.1/citesentry/parse/bibtex.py +87 -0
  26. citesentry-0.1.1/citesentry/parse/csl_json.py +87 -0
  27. citesentry-0.1.1/citesentry/parse/detect.py +100 -0
  28. citesentry-0.1.1/citesentry/parse/doi_list.py +20 -0
  29. citesentry-0.1.1/citesentry/parse/nbib.py +87 -0
  30. citesentry-0.1.1/citesentry/parse/pdf_refs.py +47 -0
  31. citesentry-0.1.1/citesentry/parse/plaintext.py +329 -0
  32. citesentry-0.1.1/citesentry/parse/ris.py +75 -0
  33. citesentry-0.1.1/citesentry/sources/__init__.py +3 -0
  34. citesentry-0.1.1/citesentry/sources/arxiv.py +111 -0
  35. citesentry-0.1.1/citesentry/sources/base.py +19 -0
  36. citesentry-0.1.1/citesentry/sources/crossref.py +97 -0
  37. citesentry-0.1.1/citesentry/sources/domain/__init__.py +0 -0
  38. citesentry-0.1.1/citesentry/sources/domain/dblp.py +86 -0
  39. citesentry-0.1.1/citesentry/sources/domain/pubmed.py +153 -0
  40. citesentry-0.1.1/citesentry/sources/openalex.py +99 -0
  41. citesentry-0.1.1/citesentry/sources/semantic_scholar.py +73 -0
  42. citesentry-0.1.1/citesentry/sources/unpaywall.py +46 -0
  43. citesentry-0.1.1/pyproject.toml +49 -0
  44. citesentry-0.1.1/refsift_build_plan.md +407 -0
  45. citesentry-0.1.1/tests/__init__.py +0 -0
  46. citesentry-0.1.1/tests/fixtures/apa_style.txt +9 -0
  47. citesentry-0.1.1/tests/fixtures/fabricated.bib +37 -0
  48. citesentry-0.1.1/tests/fixtures/ieee_style.txt +9 -0
  49. citesentry-0.1.1/tests/fixtures/known_real.bib +39 -0
  50. citesentry-0.1.1/tests/fixtures/metadata_mismatch.bib +41 -0
  51. citesentry-0.1.1/tests/fixtures/pdf_copypaste.txt +12 -0
  52. citesentry-0.1.1/tests/fixtures/sample.json +24 -0
  53. citesentry-0.1.1/tests/fixtures/sample.nbib +26 -0
  54. citesentry-0.1.1/tests/fixtures/sample.ris +20 -0
  55. citesentry-0.1.1/tests/fixtures/urls.bib +36 -0
  56. citesentry-0.1.1/tests/test_checks.py +127 -0
  57. citesentry-0.1.1/tests/test_engine.py +129 -0
  58. citesentry-0.1.1/tests/test_parse.py +176 -0
  59. citesentry-0.1.1/tests/test_sources.py +148 -0
  60. citesentry-0.1.1/uv.lock +1879 -0
@@ -0,0 +1,18 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(pip install *)",
5
+ "Bash(pip index *)",
6
+ "Bash(python *)",
7
+ "Bash(refsift check-one *)",
8
+ "Bash(python3 -c \"import json,sys; d=json.load\\(sys.stdin\\); print\\(d['overall_verdict'], d['reference']['title']\\)\")",
9
+ "Bash(refsift check *)",
10
+ "Bash(python3 *)",
11
+ "Bash(xargs sed -i '' 's/from refsift\\\\./from citesentry./g; s/from refsift import/from citesentry import/g; s/import refsift\\\\./import citesentry./g')"
12
+ ]
13
+ },
14
+ "enableAllProjectMcpServers": true,
15
+ "enabledMcpjsonServers": [
16
+ "semantic-scholar-mcp"
17
+ ]
18
+ }
@@ -0,0 +1,26 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+
8
+ jobs:
9
+ build-and-publish:
10
+ runs-on: ubuntu-latest
11
+ environment: pypi
12
+ permissions:
13
+ id-token: write # required for trusted publishing
14
+
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - uses: astral-sh/setup-uv@v4
19
+ with:
20
+ version: "latest"
21
+
22
+ - name: Build package
23
+ run: uv build
24
+
25
+ - name: Publish to PyPI
26
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,36 @@
1
+ # citesentry — Claude Code session notes
2
+
3
+ ## Guardrails (non-negotiable)
4
+
5
+ - Never label a reference "fake" or "fraudulent" — only "could not verify / needs review."
6
+ - Never bypass CAPTCHA or bot-protection; classify as SKIPPED.
7
+ - Never hardcode API keys; read from env; degrade gracefully when absent.
8
+ - Core (`citesentry/core/`, `citesentry/checks/`, `citesentry/sources/`, `citesentry/parse/`) must never import Typer, Rich, or MCP.
9
+ - MCP server stdout must stay clean (JSON-RPC stream). Log to stderr only.
10
+ - Always send `mailto` to OpenAlex/Crossref; respect rate limits; cache aggressively.
11
+ - Report all counts honestly: checked, skipped, errored — never silently drop.
12
+
13
+ ## Architecture
14
+
15
+ ```
16
+ ┌──────────────────────────────┐
17
+ bib/pdf/txt → │ citesentry.core (library) │ → VerificationReport (pydantic)
18
+ └──────────────────────────────┘
19
+ ▲ ▲
20
+ │ │
21
+ citesentry.cli citesentry.mcp_server
22
+ (Typer + Rich) (FastMCP / stdio)
23
+ ```
24
+
25
+ If verification logic ever appears inside `cli.py` or `mcp_server.py`, that is a bug — move it to core.
26
+
27
+ ## LLM strategy
28
+
29
+ - MCP server: uses MCP sampling (`ctx.sample()`) — no API key needed.
30
+ - CLI: uses DeepSeek via OpenAI-compatible endpoint; requires `DEEPSEEK_API_KEY`.
31
+ - `--no-llm` skips relevance checks entirely; tool remains fully usable.
32
+
33
+ ## Verdict wording
34
+
35
+ `NOT_FOUND` → "could not verify — likely fabricated, needs manual review"
36
+ Never use the word "fake."
@@ -0,0 +1,201 @@
1
+ Metadata-Version: 2.4
2
+ Name: citesentry
3
+ Version: 0.1.1
4
+ Summary: Citation verification tool: existence, URL liveness, and content relevance checks
5
+ License: MIT
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: bibtexparser>=1.4
8
+ Requires-Dist: httpx>=0.27
9
+ Requires-Dist: mcp[cli]>=1.0
10
+ Requires-Dist: pdfminer-six>=20221105
11
+ Requires-Dist: platformdirs>=4
12
+ Requires-Dist: pydantic>=2
13
+ Requires-Dist: rapidfuzz>=3
14
+ Requires-Dist: rich>=13
15
+ Requires-Dist: rispy>=0.9
16
+ Requires-Dist: typer>=0.12
17
+ Provides-Extra: cli-llm
18
+ Requires-Dist: openai>=1.0; extra == 'cli-llm'
19
+ Provides-Extra: dev
20
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
21
+ Requires-Dist: pytest>=8; extra == 'dev'
22
+ Requires-Dist: respx>=0.21; extra == 'dev'
23
+ Requires-Dist: ruff>=0.4; extra == 'dev'
24
+ Provides-Extra: domain
25
+ Provides-Extra: pdf
26
+ Requires-Dist: refextract; extra == 'pdf'
27
+ Description-Content-Type: text/markdown
28
+
29
+ # CiteSentry
30
+
31
+ [![PyPI](https://img.shields.io/pypi/v/citesentry)](https://pypi.org/project/citesentry/)
32
+ [![Python](https://img.shields.io/pypi/pyversions/citesentry)](https://pypi.org/project/citesentry/)
33
+ [![CI](https://github.com/mkassaf/CiteSentry/actions/workflows/publish.yml/badge.svg)](https://github.com/mkassaf/CiteSentry/actions/workflows/publish.yml)
34
+
35
+ Citation verification tool: check whether references actually exist, whether their URLs are live, and whether the content is relevant to the citation.
36
+
37
+ ## What it does
38
+
39
+ Three checks per reference:
40
+
41
+ 1. **Existence** — resolves against OpenAlex, Crossref, Semantic Scholar, arXiv, and domain-specific databases (PubMed for biomedical, DBLP for CS)
42
+ 2. **URL liveness** — HTTP HEAD/GET check; classifies 2xx/4xx/timeout/bot-protection
43
+ 3. **Content relevance** — LLM-backed check comparing fetched content to the cited title/topic (requires `DEEPSEEK_API_KEY` for CLI use)
44
+
45
+ Verdicts: `VERIFIED`, `METADATA_MISMATCH`, `DEAD_URL`, `CONTENT_DRIFT`, `NOT_FOUND`, `UNRESOLVABLE`.
46
+
47
+ `NOT_FOUND` means "could not verify — likely fabricated, needs manual review." Never "fake."
48
+
49
+ ## Install
50
+
51
+ ```bash
52
+ pip install citesentry # basic install
53
+ pip install "citesentry[cli-llm]" # + DeepSeek for relevance checks
54
+ ```
55
+
56
+ For development:
57
+
58
+ ```bash
59
+ git clone https://github.com/mkassaf/CiteSentry
60
+ cd CiteSentry
61
+ pip install -e ".[dev]"
62
+ ```
63
+
64
+ ## CLI usage
65
+
66
+ ```bash
67
+ # Check a BibTeX file
68
+ citesentry check refs.bib
69
+
70
+ # Check a RIS/CSL-JSON/NBIB/plaintext file
71
+ citesentry check refs.ris
72
+ citesentry check refs.json
73
+
74
+ # Read from stdin
75
+ cat refs.txt | citesentry check -
76
+
77
+ # Single ad-hoc reference
78
+ citesentry check-one "Vaswani et al. (2017). Attention is all you need. NeurIPS."
79
+
80
+ # Output formats: table (default), json, md
81
+ citesentry check refs.bib --format json
82
+ citesentry check refs.bib --format md > report.md
83
+
84
+ # Skip checks
85
+ citesentry check refs.bib --no-llm # skip relevance (no API key needed)
86
+ citesentry check refs.bib --no-url # skip URL liveness
87
+
88
+ # Domain adapters (auto by default)
89
+ citesentry check refs.bib --domain pubmed # force PubMed only
90
+ citesentry check refs.bib --domain none # disable domain adapters
91
+
92
+ # Override plaintext style detection
93
+ citesentry check refs.txt --style ieee
94
+ ```
95
+
96
+ Exit code is non-zero if any reference is `NOT_FOUND` or `DEAD_URL` (useful in CI).
97
+
98
+ ## MCP server (Claude Desktop / Claude Code)
99
+
100
+ Add to your `claude_desktop_config.json`:
101
+
102
+ ```json
103
+ {
104
+ "mcpServers": {
105
+ "citesentry": {
106
+ "command": "citesentry-mcp",
107
+ "env": {
108
+ "CITESENTRY_MAILTO": "you@example.com",
109
+ "DEEPSEEK_API_KEY": "sk-..."
110
+ }
111
+ }
112
+ }
113
+ }
114
+ ```
115
+
116
+ Or with `uvx` (no prior install needed):
117
+
118
+ ```json
119
+ {
120
+ "mcpServers": {
121
+ "citesentry": {
122
+ "command": "uvx",
123
+ "args": ["--from", "citesentry", "citesentry-mcp"],
124
+ "env": { "CITESENTRY_MAILTO": "you@example.com" }
125
+ }
126
+ }
127
+ }
128
+ ```
129
+
130
+ MCP tools exposed:
131
+ - `verify_reference(reference, check_url, check_relevance)` — single reference
132
+ - `verify_reference_list(references, format, check_url, check_relevance)` — batch
133
+ - `check_url_alive(url)` — standalone URL check
134
+
135
+ ### Claude Code (CLI)
136
+
137
+ Register the server once:
138
+
139
+ ```bash
140
+ claude mcp add citesentry \
141
+ -e CITESENTRY_MAILTO=you@example.com \
142
+ -- uvx --from citesentry citesentry-mcp
143
+ ```
144
+
145
+ Then in any Claude Code session, ask naturally:
146
+
147
+ > "Use citesentry to verify this reference: Vaswani et al. (2017). Attention is all you need. NeurIPS."
148
+
149
+ > "Check whether all the references in refs.bib are real."
150
+
151
+ > "Is https://arxiv.org/abs/1706.03762 still live?"
152
+
153
+ ### Any MCP-compatible agent (Python example)
154
+
155
+ ```python
156
+ import asyncio
157
+ from mcp import ClientSession, StdioServerParameters
158
+ from mcp.client.stdio import stdio_client
159
+
160
+ server = StdioServerParameters(
161
+ command="uvx",
162
+ args=["--from", "citesentry", "citesentry-mcp"],
163
+ env={"CITESENTRY_MAILTO": "you@example.com"},
164
+ )
165
+
166
+ async def main():
167
+ async with stdio_client(server) as (read, write):
168
+ async with ClientSession(read, write) as session:
169
+ await session.initialize()
170
+
171
+ result = await session.call_tool(
172
+ "verify_reference",
173
+ {"reference": "Vaswani et al. (2017). Attention is all you need. NeurIPS."},
174
+ )
175
+ print(result.content[0].text)
176
+
177
+ asyncio.run(main())
178
+ ```
179
+
180
+ ## Environment variables
181
+
182
+ | Variable | Default | Description |
183
+ |---|---|---|
184
+ | `CITESENTRY_MAILTO` | `citesentry@example.com` | Polite email for OpenAlex/Crossref API |
185
+ | `DEEPSEEK_API_KEY` | — | Required for relevance checks in CLI |
186
+ | `DEEPSEEK_BASE_URL` | `https://api.deepseek.com/v1` | OpenAI-compatible endpoint |
187
+ | `DEEPSEEK_MODEL` | `deepseek-chat` | Model for relevance judgments |
188
+
189
+ ## Supported input formats
190
+
191
+ - BibTeX (`.bib`) — via bibtexparser
192
+ - RIS (`.ris`) — via rispy; covers Zotero, Mendeley, EndNote, Web of Science
193
+ - CSL JSON (`.json`) — Zotero exports
194
+ - PubMed NBIB (`.nbib`)
195
+ - DOI list (`.txt` with one DOI per line)
196
+ - Plaintext reference sections — IEEE, APA, Vancouver, MLA, Chicago; auto-detected
197
+ - PDF (`.pdf`) — extracts reference section text via pdfminer.six
198
+
199
+ ## Caching
200
+
201
+ Results are cached in a SQLite database (`~/.cache/citesentry/cache.db`). Pass `--no-cache` to bypass.
@@ -0,0 +1,173 @@
1
+ # CiteSentry
2
+
3
+ [![PyPI](https://img.shields.io/pypi/v/citesentry)](https://pypi.org/project/citesentry/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/citesentry)](https://pypi.org/project/citesentry/)
5
+ [![CI](https://github.com/mkassaf/CiteSentry/actions/workflows/publish.yml/badge.svg)](https://github.com/mkassaf/CiteSentry/actions/workflows/publish.yml)
6
+
7
+ Citation verification tool: check whether references actually exist, whether their URLs are live, and whether the content is relevant to the citation.
8
+
9
+ ## What it does
10
+
11
+ Three checks per reference:
12
+
13
+ 1. **Existence** — resolves against OpenAlex, Crossref, Semantic Scholar, arXiv, and domain-specific databases (PubMed for biomedical, DBLP for CS)
14
+ 2. **URL liveness** — HTTP HEAD/GET check; classifies 2xx/4xx/timeout/bot-protection
15
+ 3. **Content relevance** — LLM-backed check comparing fetched content to the cited title/topic (requires `DEEPSEEK_API_KEY` for CLI use)
16
+
17
+ Verdicts: `VERIFIED`, `METADATA_MISMATCH`, `DEAD_URL`, `CONTENT_DRIFT`, `NOT_FOUND`, `UNRESOLVABLE`.
18
+
19
+ `NOT_FOUND` means "could not verify — likely fabricated, needs manual review." Never "fake."
20
+
21
+ ## Install
22
+
23
+ ```bash
24
+ pip install citesentry # basic install
25
+ pip install "citesentry[cli-llm]" # + DeepSeek for relevance checks
26
+ ```
27
+
28
+ For development:
29
+
30
+ ```bash
31
+ git clone https://github.com/mkassaf/CiteSentry
32
+ cd CiteSentry
33
+ pip install -e ".[dev]"
34
+ ```
35
+
36
+ ## CLI usage
37
+
38
+ ```bash
39
+ # Check a BibTeX file
40
+ citesentry check refs.bib
41
+
42
+ # Check a RIS/CSL-JSON/NBIB/plaintext file
43
+ citesentry check refs.ris
44
+ citesentry check refs.json
45
+
46
+ # Read from stdin
47
+ cat refs.txt | citesentry check -
48
+
49
+ # Single ad-hoc reference
50
+ citesentry check-one "Vaswani et al. (2017). Attention is all you need. NeurIPS."
51
+
52
+ # Output formats: table (default), json, md
53
+ citesentry check refs.bib --format json
54
+ citesentry check refs.bib --format md > report.md
55
+
56
+ # Skip checks
57
+ citesentry check refs.bib --no-llm # skip relevance (no API key needed)
58
+ citesentry check refs.bib --no-url # skip URL liveness
59
+
60
+ # Domain adapters (auto by default)
61
+ citesentry check refs.bib --domain pubmed # force PubMed only
62
+ citesentry check refs.bib --domain none # disable domain adapters
63
+
64
+ # Override plaintext style detection
65
+ citesentry check refs.txt --style ieee
66
+ ```
67
+
68
+ Exit code is non-zero if any reference is `NOT_FOUND` or `DEAD_URL` (useful in CI).
69
+
70
+ ## MCP server (Claude Desktop / Claude Code)
71
+
72
+ Add to your `claude_desktop_config.json`:
73
+
74
+ ```json
75
+ {
76
+ "mcpServers": {
77
+ "citesentry": {
78
+ "command": "citesentry-mcp",
79
+ "env": {
80
+ "CITESENTRY_MAILTO": "you@example.com",
81
+ "DEEPSEEK_API_KEY": "sk-..."
82
+ }
83
+ }
84
+ }
85
+ }
86
+ ```
87
+
88
+ Or with `uvx` (no prior install needed):
89
+
90
+ ```json
91
+ {
92
+ "mcpServers": {
93
+ "citesentry": {
94
+ "command": "uvx",
95
+ "args": ["--from", "citesentry", "citesentry-mcp"],
96
+ "env": { "CITESENTRY_MAILTO": "you@example.com" }
97
+ }
98
+ }
99
+ }
100
+ ```
101
+
102
+ MCP tools exposed:
103
+ - `verify_reference(reference, check_url, check_relevance)` — single reference
104
+ - `verify_reference_list(references, format, check_url, check_relevance)` — batch
105
+ - `check_url_alive(url)` — standalone URL check
106
+
107
+ ### Claude Code (CLI)
108
+
109
+ Register the server once:
110
+
111
+ ```bash
112
+ claude mcp add citesentry \
113
+ -e CITESENTRY_MAILTO=you@example.com \
114
+ -- uvx --from citesentry citesentry-mcp
115
+ ```
116
+
117
+ Then in any Claude Code session, ask naturally:
118
+
119
+ > "Use citesentry to verify this reference: Vaswani et al. (2017). Attention is all you need. NeurIPS."
120
+
121
+ > "Check whether all the references in refs.bib are real."
122
+
123
+ > "Is https://arxiv.org/abs/1706.03762 still live?"
124
+
125
+ ### Any MCP-compatible agent (Python example)
126
+
127
+ ```python
128
+ import asyncio
129
+ from mcp import ClientSession, StdioServerParameters
130
+ from mcp.client.stdio import stdio_client
131
+
132
+ server = StdioServerParameters(
133
+ command="uvx",
134
+ args=["--from", "citesentry", "citesentry-mcp"],
135
+ env={"CITESENTRY_MAILTO": "you@example.com"},
136
+ )
137
+
138
+ async def main():
139
+ async with stdio_client(server) as (read, write):
140
+ async with ClientSession(read, write) as session:
141
+ await session.initialize()
142
+
143
+ result = await session.call_tool(
144
+ "verify_reference",
145
+ {"reference": "Vaswani et al. (2017). Attention is all you need. NeurIPS."},
146
+ )
147
+ print(result.content[0].text)
148
+
149
+ asyncio.run(main())
150
+ ```
151
+
152
+ ## Environment variables
153
+
154
+ | Variable | Default | Description |
155
+ |---|---|---|
156
+ | `CITESENTRY_MAILTO` | `citesentry@example.com` | Polite email for OpenAlex/Crossref API |
157
+ | `DEEPSEEK_API_KEY` | — | Required for relevance checks in CLI |
158
+ | `DEEPSEEK_BASE_URL` | `https://api.deepseek.com/v1` | OpenAI-compatible endpoint |
159
+ | `DEEPSEEK_MODEL` | `deepseek-chat` | Model for relevance judgments |
160
+
161
+ ## Supported input formats
162
+
163
+ - BibTeX (`.bib`) — via bibtexparser
164
+ - RIS (`.ris`) — via rispy; covers Zotero, Mendeley, EndNote, Web of Science
165
+ - CSL JSON (`.json`) — Zotero exports
166
+ - PubMed NBIB (`.nbib`)
167
+ - DOI list (`.txt` with one DOI per line)
168
+ - Plaintext reference sections — IEEE, APA, Vancouver, MLA, Chicago; auto-detected
169
+ - PDF (`.pdf`) — extracts reference section text via pdfminer.six
170
+
171
+ ## Caching
172
+
173
+ Results are cached in a SQLite database (`~/.cache/citesentry/cache.db`). Pass `--no-cache` to bypass.
@@ -0,0 +1,3 @@
1
+ """citesentry — citation verification tool."""
2
+
3
+ __version__ = "0.1.1"
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import json
5
+ import sqlite3
6
+ import time
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+
11
+ class Cache:
12
+ def __init__(self, path: Path) -> None:
13
+ self._path = path
14
+ self._path.parent.mkdir(parents=True, exist_ok=True)
15
+ self._conn = sqlite3.connect(str(path), check_same_thread=False)
16
+ self._init()
17
+
18
+ def _init(self) -> None:
19
+ self._conn.execute(
20
+ """CREATE TABLE IF NOT EXISTS cache (
21
+ key TEXT PRIMARY KEY,
22
+ value TEXT NOT NULL,
23
+ created_at REAL NOT NULL
24
+ )"""
25
+ )
26
+ self._conn.commit()
27
+
28
+ @staticmethod
29
+ def _key(namespace: str, identifier: str) -> str:
30
+ h = hashlib.sha256(f"{namespace}:{identifier}".encode()).hexdigest()
31
+ return h
32
+
33
+ def get(self, namespace: str, identifier: str) -> Any | None:
34
+ key = self._key(namespace, identifier)
35
+ row = self._conn.execute(
36
+ "SELECT value FROM cache WHERE key = ?", (key,)
37
+ ).fetchone()
38
+ if row is None:
39
+ return None
40
+ return json.loads(row[0])
41
+
42
+ def set(self, namespace: str, identifier: str, value: Any) -> None:
43
+ key = self._key(namespace, identifier)
44
+ self._conn.execute(
45
+ "INSERT OR REPLACE INTO cache (key, value, created_at) VALUES (?, ?, ?)",
46
+ (key, json.dumps(value), time.time()),
47
+ )
48
+ self._conn.commit()
49
+
50
+ def close(self) -> None:
51
+ self._conn.close()
52
+
53
+
54
+ _cache: Cache | None = None
55
+
56
+
57
+ def get_cache(path: Path | None = None) -> Cache:
58
+ global _cache
59
+ if _cache is None:
60
+ from citesentry.config import get_settings
61
+ p = path or get_settings().cache_path
62
+ _cache = Cache(p)
63
+ return _cache
64
+
65
+
66
+ def reset_cache() -> None:
67
+ global _cache
68
+ if _cache is not None:
69
+ _cache.close()
70
+ _cache = None
File without changes