nl-processing 0.3.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {nl_processing-0.3.0 → nl_processing-0.5.0}/PKG-INFO +2 -1
  2. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/backend/_neon_exercise.py +27 -0
  3. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/backend/_queries.py +13 -0
  4. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/backend/abstract.py +25 -0
  5. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/backend/neon.py +33 -41
  6. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/exercise_progress.py +12 -16
  7. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/service.py +13 -0
  8. nl_processing-0.5.0/nl_processing/database_cache/_local_store_queries.py +58 -0
  9. nl_processing-0.5.0/nl_processing/database_cache/exceptions.py +10 -0
  10. nl_processing-0.5.0/nl_processing/database_cache/local_store.py +200 -0
  11. nl_processing-0.5.0/nl_processing/database_cache/logging.py +5 -0
  12. nl_processing-0.5.0/nl_processing/database_cache/models.py +12 -0
  13. nl_processing-0.5.0/nl_processing/database_cache/service.py +185 -0
  14. nl_processing-0.5.0/nl_processing/database_cache/sync.py +82 -0
  15. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/prompts/generate_nl_prompt.py +9 -2
  16. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/prompts/nl.json +62 -0
  17. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/sampling/service.py +18 -6
  18. nl_processing-0.5.0/nl_processing/translate_word/__init__.py +0 -0
  19. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing.egg-info/PKG-INFO +2 -1
  20. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing.egg-info/SOURCES.txt +8 -1
  21. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing.egg-info/requires.txt +1 -0
  22. {nl_processing-0.3.0 → nl_processing-0.5.0}/pyproject.toml +2 -1
  23. nl_processing-0.3.0/nl_processing/database/cached_service.py +0 -82
  24. {nl_processing-0.3.0 → nl_processing-0.5.0}/README.md +0 -0
  25. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/__init__.py +0 -0
  26. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/core/__init__.py +0 -0
  27. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/core/exceptions.py +0 -0
  28. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/core/models.py +0 -0
  29. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/core/prompts.py +0 -0
  30. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/core/scripts/prompt_author.py +0 -0
  31. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/__init__.py +0 -0
  32. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/backend/__init__.py +0 -0
  33. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/exceptions.py +0 -0
  34. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/logging.py +0 -0
  35. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/models.py +0 -0
  36. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/testing.py +0 -0
  37. {nl_processing-0.3.0/nl_processing/extract_words_from_text → nl_processing-0.5.0/nl_processing/database_cache}/__init__.py +0 -0
  38. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/__init__.py +0 -0
  39. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/benchmark.py +0 -0
  40. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/image_encoding.py +0 -0
  41. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/service.py +0 -0
  42. {nl_processing-0.3.0/nl_processing/sampling → nl_processing-0.5.0/nl_processing/extract_words_from_text}/__init__.py +0 -0
  43. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_words_from_text/prompts/generate_nl_prompt.py +0 -0
  44. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_words_from_text/prompts/nl.json +0 -0
  45. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_words_from_text/service.py +0 -0
  46. {nl_processing-0.3.0/nl_processing/translate_text → nl_processing-0.5.0/nl_processing/sampling}/__init__.py +0 -0
  47. {nl_processing-0.3.0/nl_processing/translate_word → nl_processing-0.5.0/nl_processing/translate_text}/__init__.py +0 -0
  48. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_text/prompts/generate_nl_ru_prompt.py +0 -0
  49. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_text/prompts/nl_ru.json +0 -0
  50. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_text/service.py +0 -0
  51. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_word/prompts/generate_nl_ru_prompt.py +0 -0
  52. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_word/prompts/nl_ru.json +0 -0
  53. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_word/service.py +0 -0
  54. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing.egg-info/dependency_links.txt +0 -0
  55. {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing.egg-info/top_level.txt +0 -0
  56. {nl_processing-0.3.0 → nl_processing-0.5.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nl_processing
3
- Version: 0.3.0
3
+ Version: 0.5.0
4
4
  Summary: Natural language processing playground
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -9,6 +9,7 @@ Requires-Dist: langchain<1,>=0.3
9
9
  Requires-Dist: langchain-openai<1,>=0.3
10
10
  Requires-Dist: opencv-python<5,>=4.10
11
11
  Requires-Dist: asyncpg<1,>=0.30
12
+ Requires-Dist: aiosqlite<1,>=0.20
12
13
 
13
14
  # nl_processing
14
15
 
@@ -90,3 +90,30 @@ async def mark_event(
90
90
  await conn.execute(mark_event_applied_query(table), event_id)
91
91
  except asyncpg.PostgresError as exc:
92
92
  raise DatabaseError(str(exc)) from exc
93
+
94
+
95
+ async def atomic_apply_delta(
96
+ conn: asyncpg.Connection, # type: ignore[type-arg]
97
+ score_table: str,
98
+ events_table: str,
99
+ user_id: str,
100
+ event_id: str,
101
+ source_word_id: int,
102
+ delta: int,
103
+ ) -> bool:
104
+ """Atomically check-apply-mark a score delta in one transaction."""
105
+ try:
106
+ async with conn.transaction():
107
+ already = await conn.fetchrow(check_event_applied_query(events_table), event_id)
108
+ if already is not None:
109
+ return False
110
+ await conn.fetchrow(
111
+ increment_score_query(score_table),
112
+ user_id,
113
+ source_word_id,
114
+ delta,
115
+ )
116
+ await conn.execute(mark_event_applied_query(events_table), event_id)
117
+ return True
118
+ except asyncpg.PostgresError as exc:
119
+ raise DatabaseError(str(exc)) from exc
@@ -133,6 +133,19 @@ def get_user_words_query(
133
133
  return query
134
134
 
135
135
 
136
+ def count_user_words_query(language: str, word_type: str | None) -> str:
137
+ # Table name from Language enum value, not user input # noqa: S608
138
+ query = f"""
139
+ SELECT COUNT(*) AS cnt
140
+ FROM user_words uw
141
+ JOIN words_{language} w ON uw.word_id = w.id
142
+ WHERE uw.user_id = $1 AND uw.language = $2
143
+ """ # noqa: S608
144
+ if word_type is not None:
145
+ query += " AND w.word_type = $3"
146
+ return query
147
+
148
+
136
149
  def increment_score_query(table: str) -> str:
137
150
  # Table name from Language enum values, not user input # noqa: S608
138
151
  return f"""
@@ -53,6 +53,15 @@ class AbstractBackend(ABC):
53
53
  and random ordering.
54
54
  """
55
55
 
56
+ @abstractmethod
57
+ async def count_user_words(
58
+ self,
59
+ user_id: str,
60
+ language: str,
61
+ word_type: str | None = None,
62
+ ) -> int:
63
+ """Return total user-word associations for the given user and language."""
64
+
56
65
  @abstractmethod
57
66
  async def add_user_word(
58
67
  self,
@@ -103,6 +112,22 @@ class AbstractBackend(ABC):
103
112
  ) -> None:
104
113
  """Insert event_id into the applied_events table."""
105
114
 
115
+ @abstractmethod
116
+ async def apply_score_delta_atomic(
117
+ self,
118
+ score_table: str,
119
+ events_table: str,
120
+ user_id: str,
121
+ event_id: str,
122
+ source_word_id: int,
123
+ delta: int,
124
+ ) -> bool:
125
+ """Atomically check-apply-mark a score delta in one transaction.
126
+
127
+ Returns True if the delta was applied, False if event_id was already applied.
128
+ The entire operation (check + increment + mark) runs in a single transaction.
129
+ """
130
+
106
131
  @abstractmethod
107
132
  async def create_tables(
108
133
  self,
@@ -1,8 +1,9 @@
1
- """NeonBackend asyncpg implementation of AbstractBackend for Neon PostgreSQL."""
1
+ """NeonBackend asyncpg implementation for Neon PostgreSQL."""
2
2
 
3
3
  import asyncpg
4
4
 
5
5
  from nl_processing.database.backend._neon_exercise import (
6
+ atomic_apply_delta,
6
7
  check_event,
7
8
  create_exercise_tables,
8
9
  get_scores,
@@ -14,6 +15,7 @@ from nl_processing.database.backend._queries import (
14
15
  CREATE_USER_WORDS,
15
16
  add_translation_link_query,
16
17
  add_word_query,
18
+ count_user_words_query,
17
19
  create_translations_table,
18
20
  create_words_table,
19
21
  get_user_words_query,
@@ -34,7 +36,6 @@ class NeonBackend(AbstractBackend):
34
36
  self._connection: asyncpg.Connection | None = None # type: ignore[type-arg]
35
37
 
36
38
  async def _connect(self) -> asyncpg.Connection: # type: ignore[type-arg]
37
- """Return cached connection, creating it lazily on first call."""
38
39
  if self._connection is None:
39
40
  try:
40
41
  self._connection = await asyncpg.connect(dsn=self._database_url)
@@ -43,13 +44,12 @@ class NeonBackend(AbstractBackend):
43
44
  raise DatabaseError(str(exc)) from exc
44
45
  except OSError as exc:
45
46
  raise DatabaseError(str(exc)) from exc
47
+ if self._connection is None:
48
+ raise DatabaseError("Database connection was not initialized")
46
49
  return self._connection
47
50
 
48
51
  async def create_tables(
49
- self,
50
- languages: list[str],
51
- pairs: list[tuple[str, str]],
52
- exercise_slugs: list[str],
52
+ self, languages: list[str], pairs: list[tuple[str, str]], exercise_slugs: list[str]
53
53
  ) -> None:
54
54
  conn = await self._connect()
55
55
  try:
@@ -78,11 +78,7 @@ class NeonBackend(AbstractBackend):
78
78
  return None
79
79
  return int(row["id"])
80
80
 
81
- async def get_word(
82
- self,
83
- table: str,
84
- normalized_form: str,
85
- ) -> dict[str, str | int] | None:
81
+ async def get_word(self, table: str, normalized_form: str) -> dict[str, str | int] | None:
86
82
  conn = await self._connect()
87
83
  try:
88
84
  row = await conn.fetchrow(get_word_query(table), normalized_form)
@@ -96,24 +92,14 @@ class NeonBackend(AbstractBackend):
96
92
  "word_type": row["word_type"],
97
93
  }
98
94
 
99
- async def add_translation_link(
100
- self,
101
- table: str,
102
- source_id: int,
103
- target_id: int,
104
- ) -> None:
95
+ async def add_translation_link(self, table: str, source_id: int, target_id: int) -> None:
105
96
  conn = await self._connect()
106
97
  try:
107
98
  await conn.execute(add_translation_link_query(table), source_id, target_id)
108
99
  except asyncpg.PostgresError as exc:
109
100
  raise DatabaseError(str(exc)) from exc
110
101
 
111
- async def add_user_word(
112
- self,
113
- user_id: str,
114
- word_id: int,
115
- language: str,
116
- ) -> None:
102
+ async def add_user_word(self, user_id: str, word_id: int, language: str) -> None:
117
103
  conn = await self._connect()
118
104
  try:
119
105
  await conn.execute(ADD_USER_WORD, user_id, word_id, language)
@@ -144,6 +130,19 @@ class NeonBackend(AbstractBackend):
144
130
  raise DatabaseError(str(exc)) from exc
145
131
  return [dict(row) for row in rows]
146
132
 
133
+ async def count_user_words(self, user_id: str, language: str, word_type: str | None = None) -> int:
134
+ conn = await self._connect()
135
+ args: list[str] = [user_id, language]
136
+ if word_type is not None:
137
+ args.append(word_type)
138
+ try:
139
+ count = await conn.fetchval(count_user_words_query(language, word_type), *args)
140
+ except asyncpg.PostgresError as exc:
141
+ raise DatabaseError(str(exc)) from exc
142
+ if count is None:
143
+ return 0
144
+ return int(count)
145
+
147
146
  async def increment_user_exercise_score(
148
147
  self,
149
148
  table: str,
@@ -155,36 +154,29 @@ class NeonBackend(AbstractBackend):
155
154
  return await increment_score(conn, table, user_id, source_word_id, delta)
156
155
 
157
156
  async def get_user_exercise_scores(
158
- self,
159
- table: str,
160
- user_id: str,
161
- source_word_ids: list[int],
157
+ self, table: str, user_id: str, source_word_ids: list[int]
162
158
  ) -> list[dict[str, str | int]]:
163
159
  conn = await self._connect()
164
160
  return await get_scores(conn, table, user_id, source_word_ids)
165
161
 
166
- async def check_event_applied(
167
- self,
168
- table: str,
169
- event_id: str,
170
- ) -> bool:
162
+ async def check_event_applied(self, table: str, event_id: str) -> bool:
171
163
  conn = await self._connect()
172
164
  return await check_event(conn, table, event_id)
173
165
 
174
- async def mark_event_applied(
175
- self,
176
- table: str,
177
- event_id: str,
178
- ) -> None:
166
+ async def mark_event_applied(self, table: str, event_id: str) -> None:
179
167
  conn = await self._connect()
180
168
  await mark_event(conn, table, event_id)
181
169
 
170
+ async def apply_score_delta_atomic(
171
+ self, score_table: str, events_table: str,
172
+ user_id: str, event_id: str, source_word_id: int, delta: int,
173
+ ) -> bool: # fmt: skip
174
+ conn = await self._connect()
175
+ return await atomic_apply_delta(conn, score_table, events_table, user_id, event_id, source_word_id, delta)
182
176
 
183
- def _infer_target_language(source_language: str) -> str:
184
- """Infer the target language for translation lookups.
185
177
 
186
- With only two languages (nl, ru), the target is always the other one.
187
- """
178
+ def _infer_target_language(source_language: str) -> str:
179
+ """Infer the other language in the nl/ru pair."""
188
180
  if source_language == "nl":
189
181
  return "ru"
190
182
  return "nl"
@@ -119,25 +119,21 @@ class ExerciseProgressStore:
119
119
  ) -> None:
120
120
  """Apply a score delta idempotently, guarded by event deduplication.
121
121
 
122
- Validates exercise_type. Skips if event_id was already applied.
122
+ Validates exercise_type and delta. Skips if event_id was already applied.
123
+ The check-increment-mark operation is atomic (single transaction).
123
124
  """
124
125
  self._validate_exercise_type(exercise_type)
125
- already_applied = await self._backend.check_event_applied(
126
- self._applied_events_table,
127
- event_id,
128
- )
129
- if already_applied:
130
- return
126
+ if delta not in (1, -1):
127
+ msg = f"delta must be +1 or -1, got {delta}"
128
+ raise ValueError(msg)
131
129
  table = self._score_tables[exercise_type]
132
- await self._backend.increment_user_exercise_score(
133
- table,
134
- self._user_id,
135
- source_word_id,
136
- delta,
137
- )
138
- await self._backend.mark_event_applied(
139
- self._applied_events_table,
140
- event_id,
130
+ await self._backend.apply_score_delta_atomic(
131
+ score_table=table,
132
+ events_table=self._applied_events_table,
133
+ user_id=self._user_id,
134
+ event_id=event_id,
135
+ source_word_id=source_word_id,
136
+ delta=delta,
141
137
  )
142
138
 
143
139
  def _validate_exercise_type(self, exercise_type: str) -> None:
@@ -140,6 +140,19 @@ class DatabaseService:
140
140
  language=self._target_language,
141
141
  )
142
142
  pairs.append(WordPair(source=source, target=target))
143
+ if limit is None and not random:
144
+ total_count = await self._backend.count_user_words(
145
+ self._user_id,
146
+ self._source_language.value,
147
+ word_type=word_type.value if word_type else None,
148
+ )
149
+ if total_count > len(pairs):
150
+ excluded_count = total_count - len(pairs)
151
+ _logger.warning(
152
+ "%d of %d words excluded from get_words() due to missing translations",
153
+ excluded_count,
154
+ total_count,
155
+ )
143
156
  return pairs
144
157
 
145
158
  @classmethod
@@ -0,0 +1,58 @@
1
+ """DDL and query constants for the local SQLite cache store."""
2
+
3
+ DDL_CACHED_WORD_PAIRS = """
4
+ CREATE TABLE IF NOT EXISTS cached_word_pairs (
5
+ source_word_id INTEGER PRIMARY KEY,
6
+ source_normalized_form TEXT NOT NULL,
7
+ source_word_type TEXT NOT NULL,
8
+ target_word_id INTEGER NOT NULL,
9
+ target_normalized_form TEXT NOT NULL,
10
+ target_word_type TEXT NOT NULL
11
+ )"""
12
+
13
+ DDL_CACHED_SCORES = """
14
+ CREATE TABLE IF NOT EXISTS cached_scores (
15
+ source_word_id INTEGER NOT NULL,
16
+ exercise_type TEXT NOT NULL,
17
+ score INTEGER NOT NULL DEFAULT 0,
18
+ updated_at TEXT NOT NULL,
19
+ PRIMARY KEY (source_word_id, exercise_type)
20
+ )"""
21
+
22
+ DDL_PENDING_SCORE_EVENTS = """
23
+ CREATE TABLE IF NOT EXISTS pending_score_events (
24
+ event_id TEXT PRIMARY KEY,
25
+ source_word_id INTEGER NOT NULL,
26
+ exercise_type TEXT NOT NULL,
27
+ delta INTEGER NOT NULL,
28
+ created_at TEXT NOT NULL,
29
+ flushed_at TEXT,
30
+ last_error TEXT
31
+ )"""
32
+
33
+ DDL_CACHE_METADATA = """
34
+ CREATE TABLE IF NOT EXISTS cache_metadata (
35
+ id INTEGER PRIMARY KEY DEFAULT 1,
36
+ exercise_types TEXT NOT NULL,
37
+ schema_version INTEGER NOT NULL DEFAULT 1,
38
+ last_refresh_started_at TEXT,
39
+ last_refresh_completed_at TEXT,
40
+ last_flush_completed_at TEXT,
41
+ last_error TEXT
42
+ )"""
43
+
44
+ ALL_DDL = [DDL_CACHED_WORD_PAIRS, DDL_CACHED_SCORES, DDL_PENDING_SCORE_EVENTS, DDL_CACHE_METADATA]
45
+
46
+ UPSERT_SCORE = (
47
+ "INSERT INTO cached_scores (source_word_id, exercise_type, score, updated_at) VALUES (?, ?, ?, ?)"
48
+ " ON CONFLICT(source_word_id, exercise_type) DO UPDATE SET score = score + ?, updated_at = ?"
49
+ )
50
+
51
+ INSERT_PENDING_EVENT = (
52
+ "INSERT INTO pending_score_events (event_id, source_word_id, exercise_type, delta, created_at)"
53
+ " VALUES (?, ?, ?, ?, ?)"
54
+ )
55
+
56
+ INSERT_WORD_PAIR = "INSERT INTO cached_word_pairs VALUES (?, ?, ?, ?, ?, ?)"
57
+
58
+ INSERT_SCORE = "INSERT INTO cached_scores (source_word_id, exercise_type, score, updated_at) VALUES (?, ?, ?, ?)"
@@ -0,0 +1,10 @@
1
+ class CacheNotReadyError(Exception):
2
+ """Raised when cached data is requested before the first usable snapshot exists."""
3
+
4
+
5
+ class CacheStorageError(Exception):
6
+ """Raised when the local SQLite cache file cannot be opened, read, or updated."""
7
+
8
+
9
+ class CacheSyncError(Exception):
10
+ """Raised when an explicit refresh or flush operation fails synchronously."""
@@ -0,0 +1,200 @@
1
+ """SQLite data-access layer for the local word-pair / score cache."""
2
+
3
+ from datetime import UTC, datetime
4
+ import json
5
+ import sqlite3
6
+
7
+ import aiosqlite
8
+
9
+ from nl_processing.database_cache._local_store_queries import (
10
+ ALL_DDL,
11
+ INSERT_PENDING_EVENT,
12
+ INSERT_SCORE,
13
+ INSERT_WORD_PAIR,
14
+ UPSERT_SCORE,
15
+ )
16
+ from nl_processing.database_cache.exceptions import CacheStorageError
17
+
18
+
19
+ def _now() -> str:
20
+ return datetime.now(tz=UTC).isoformat()
21
+
22
+
23
+ class LocalStore:
24
+ """Async SQLite store for cached word pairs, scores, and pending events."""
25
+
26
+ def __init__(self, db_path: str) -> None:
27
+ self._db_path = db_path
28
+ self._db: aiosqlite.Connection | None = None
29
+
30
+ @property
31
+ def _conn(self) -> aiosqlite.Connection:
32
+ if self._db is None:
33
+ raise CacheStorageError("LocalStore is not open")
34
+ return self._db
35
+
36
+ async def open(self) -> None:
37
+ """Open the SQLite connection and create tables."""
38
+ try:
39
+ self._db = await aiosqlite.connect(self._db_path)
40
+ self._db.row_factory = aiosqlite.Row
41
+ await self._db.execute("PRAGMA journal_mode=WAL")
42
+ for ddl in ALL_DDL:
43
+ await self._db.execute(ddl)
44
+ await self._db.commit()
45
+ except sqlite3.Error as exc:
46
+ raise CacheStorageError(str(exc)) from exc
47
+
48
+ async def close(self) -> None:
49
+ """Close the SQLite connection."""
50
+ if self._db:
51
+ await self._db.close()
52
+ self._db = None
53
+
54
+ async def get_cached_word_pairs(
55
+ self,
56
+ word_type: str | None = None,
57
+ limit: int | None = None,
58
+ *,
59
+ random: bool = False,
60
+ ) -> list[dict[str, str | int]]:
61
+ """Query cached word pairs with optional filter, limit, and random ordering."""
62
+ sql = "SELECT * FROM cached_word_pairs"
63
+ params: list[str | int] = []
64
+ if word_type is not None:
65
+ sql += " WHERE source_word_type = ?"
66
+ params.append(word_type)
67
+ if random:
68
+ sql += " ORDER BY RANDOM()"
69
+ if limit is not None:
70
+ sql += " LIMIT ?"
71
+ params.append(limit)
72
+ return await self._fetch_all(sql, params)
73
+
74
+ async def get_cached_word_pairs_with_scores(self, exercise_types: list[str]) -> list[dict[str, str | int]]:
75
+ """Query word pairs and attach scores per exercise type (missing = 0)."""
76
+ try:
77
+ rows = await self._fetch_all("SELECT * FROM cached_word_pairs")
78
+ for row in rows:
79
+ for et in exercise_types:
80
+ sc = await self._conn.execute(
81
+ "SELECT score FROM cached_scores WHERE source_word_id=? AND exercise_type=?",
82
+ (row["source_word_id"], et),
83
+ )
84
+ score_row = await sc.fetchone()
85
+ row[f"score_{et}"] = int(score_row["score"]) if score_row else 0
86
+ return rows
87
+ except sqlite3.Error as exc:
88
+ raise CacheStorageError(str(exc)) from exc
89
+
90
+ async def get_pending_events(self) -> list[dict[str, str | int]]:
91
+ return await self._fetch_all("SELECT * FROM pending_score_events WHERE flushed_at IS NULL ORDER BY created_at")
92
+
93
+ async def get_pending_event_count(self) -> int:
94
+ try:
95
+ cur = await self._conn.execute("SELECT COUNT(*) FROM pending_score_events WHERE flushed_at IS NULL")
96
+ row = await cur.fetchone()
97
+ return int(row[0]) if row else 0
98
+ except sqlite3.Error as exc:
99
+ raise CacheStorageError(str(exc)) from exc
100
+
101
+ async def get_metadata(self) -> dict[str, str | int] | None:
102
+ try:
103
+ cur = await self._conn.execute("SELECT * FROM cache_metadata WHERE id = 1")
104
+ row = await cur.fetchone()
105
+ return dict(row) if row else None
106
+ except sqlite3.Error as exc:
107
+ raise CacheStorageError(str(exc)) from exc
108
+
109
+ async def has_snapshot(self) -> bool:
110
+ try:
111
+ cur = await self._conn.execute("SELECT 1 FROM cached_word_pairs LIMIT 1")
112
+ return (await cur.fetchone()) is not None
113
+ except sqlite3.Error as exc:
114
+ raise CacheStorageError(str(exc)) from exc
115
+
116
+ async def record_score_and_event(
117
+ self,
118
+ source_word_id: int,
119
+ exercise_type: str,
120
+ delta: int,
121
+ event_id: str,
122
+ ) -> None:
123
+ """Atomically upsert a cached score and insert a pending event."""
124
+ now = _now()
125
+ try:
126
+ await self._conn.execute(UPSERT_SCORE, (source_word_id, exercise_type, delta, now, delta, now))
127
+ await self._conn.execute(INSERT_PENDING_EVENT, (event_id, source_word_id, exercise_type, delta, now))
128
+ await self._conn.commit()
129
+ except sqlite3.Error as exc:
130
+ raise CacheStorageError(str(exc)) from exc
131
+
132
+ async def rebuild_snapshot(
133
+ self,
134
+ word_pairs: list[tuple[int, str, str, int, str, str]],
135
+ scores: dict[tuple[int, str], int],
136
+ ) -> None:
137
+ """Atomically replace cached word pairs and scores, then reapply pending events."""
138
+ now = _now()
139
+ try:
140
+ await self._conn.execute("DELETE FROM cached_word_pairs")
141
+ await self._conn.execute("DELETE FROM cached_scores")
142
+ for wp in word_pairs:
143
+ await self._conn.execute(INSERT_WORD_PAIR, wp)
144
+ for (wid, et), score in scores.items():
145
+ await self._conn.execute(INSERT_SCORE, (wid, et, score, now))
146
+ for evt in await self.get_pending_events():
147
+ await self._conn.execute(
148
+ UPSERT_SCORE,
149
+ (evt["source_word_id"], evt["exercise_type"], evt["delta"], now, evt["delta"], now),
150
+ )
151
+ await self._conn.commit()
152
+ except sqlite3.Error as exc:
153
+ raise CacheStorageError(str(exc)) from exc
154
+
155
+ async def mark_event_flushed(self, event_id: str) -> None:
156
+ await self._exec_commit("UPDATE pending_score_events SET flushed_at=? WHERE event_id=?", (_now(), event_id))
157
+
158
+ async def mark_event_failed(self, event_id: str, error: str) -> None:
159
+ await self._exec_commit("UPDATE pending_score_events SET last_error=? WHERE event_id=?", (error, event_id))
160
+
161
+ async def update_metadata(self, **fields: str | int | None) -> None:
162
+ if not fields:
163
+ return
164
+ set_clause = ", ".join(f"{k} = ?" for k in fields)
165
+ await self._exec_commit(
166
+ f"UPDATE cache_metadata SET {set_clause} WHERE id = 1", # noqa: S608
167
+ tuple(fields.values()),
168
+ )
169
+
170
+ async def ensure_metadata(self, exercise_types: list[str]) -> None:
171
+ await self._exec_commit(
172
+ "INSERT OR REPLACE INTO cache_metadata (id, exercise_types, schema_version) VALUES (1, ?, 1)",
173
+ (json.dumps(exercise_types),),
174
+ )
175
+
176
+ async def get_source_word_id(self, normalized_form: str, word_type: str) -> int | None:
177
+ """Look up a source_word_id from cached_word_pairs."""
178
+ try:
179
+ cur = await self._conn.execute(
180
+ "SELECT source_word_id FROM cached_word_pairs WHERE source_normalized_form=? AND source_word_type=?",
181
+ (normalized_form, word_type),
182
+ )
183
+ row = await cur.fetchone()
184
+ return int(row["source_word_id"]) if row else None
185
+ except sqlite3.Error as exc:
186
+ raise CacheStorageError(str(exc)) from exc
187
+
188
+ async def _fetch_all(self, sql: str, params: list[str | int] | None = None) -> list[dict[str, str | int]]:
189
+ try:
190
+ cur = await self._conn.execute(sql, params or [])
191
+ return [dict(row) for row in await cur.fetchall()]
192
+ except sqlite3.Error as exc:
193
+ raise CacheStorageError(str(exc)) from exc
194
+
195
+ async def _exec_commit(self, sql: str, params: tuple[str | int | None, ...]) -> None:
196
+ try:
197
+ await self._conn.execute(sql, params)
198
+ await self._conn.commit()
199
+ except sqlite3.Error as exc:
200
+ raise CacheStorageError(str(exc)) from exc
@@ -0,0 +1,5 @@
1
+ import logging
2
+
3
+
4
+ def get_logger(name: str) -> logging.Logger:
5
+ return logging.getLogger(f"nl_processing.database_cache.{name}")
@@ -0,0 +1,12 @@
1
+ from datetime import datetime
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class CacheStatus(BaseModel):
7
+ is_ready: bool
8
+ is_stale: bool
9
+ has_snapshot: bool
10
+ pending_events: int
11
+ last_refresh_completed_at: datetime | None
12
+ last_flush_completed_at: datetime | None