nl-processing 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {nl_processing-0.3.0 → nl_processing-0.4.0}/PKG-INFO +2 -1
  2. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/_neon_exercise.py +27 -0
  3. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/_queries.py +13 -0
  4. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/abstract.py +25 -0
  5. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/neon.py +33 -41
  6. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/exercise_progress.py +12 -16
  7. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/service.py +13 -0
  8. nl_processing-0.4.0/nl_processing/database_cache/_local_store_queries.py +58 -0
  9. nl_processing-0.4.0/nl_processing/database_cache/exceptions.py +10 -0
  10. nl_processing-0.4.0/nl_processing/database_cache/local_store.py +200 -0
  11. nl_processing-0.4.0/nl_processing/database_cache/logging.py +5 -0
  12. nl_processing-0.4.0/nl_processing/database_cache/models.py +12 -0
  13. nl_processing-0.4.0/nl_processing/database_cache/service.py +185 -0
  14. nl_processing-0.4.0/nl_processing/database_cache/sync.py +82 -0
  15. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/prompts/generate_nl_prompt.py +9 -2
  16. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/prompts/nl.json +62 -0
  17. nl_processing-0.4.0/nl_processing/translate_word/__init__.py +0 -0
  18. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/PKG-INFO +2 -1
  19. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/SOURCES.txt +8 -1
  20. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/requires.txt +1 -0
  21. {nl_processing-0.3.0 → nl_processing-0.4.0}/pyproject.toml +2 -1
  22. nl_processing-0.3.0/nl_processing/database/cached_service.py +0 -82
  23. {nl_processing-0.3.0 → nl_processing-0.4.0}/README.md +0 -0
  24. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/__init__.py +0 -0
  25. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/core/__init__.py +0 -0
  26. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/core/exceptions.py +0 -0
  27. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/core/models.py +0 -0
  28. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/core/prompts.py +0 -0
  29. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/core/scripts/prompt_author.py +0 -0
  30. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/__init__.py +0 -0
  31. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/__init__.py +0 -0
  32. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/exceptions.py +0 -0
  33. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/logging.py +0 -0
  34. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/models.py +0 -0
  35. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/testing.py +0 -0
  36. {nl_processing-0.3.0/nl_processing/extract_words_from_text → nl_processing-0.4.0/nl_processing/database_cache}/__init__.py +0 -0
  37. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/__init__.py +0 -0
  38. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/benchmark.py +0 -0
  39. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/image_encoding.py +0 -0
  40. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/service.py +0 -0
  41. {nl_processing-0.3.0/nl_processing/sampling → nl_processing-0.4.0/nl_processing/extract_words_from_text}/__init__.py +0 -0
  42. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_words_from_text/prompts/generate_nl_prompt.py +0 -0
  43. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_words_from_text/prompts/nl.json +0 -0
  44. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_words_from_text/service.py +0 -0
  45. {nl_processing-0.3.0/nl_processing/translate_text → nl_processing-0.4.0/nl_processing/sampling}/__init__.py +0 -0
  46. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/sampling/service.py +0 -0
  47. {nl_processing-0.3.0/nl_processing/translate_word → nl_processing-0.4.0/nl_processing/translate_text}/__init__.py +0 -0
  48. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_text/prompts/generate_nl_ru_prompt.py +0 -0
  49. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_text/prompts/nl_ru.json +0 -0
  50. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_text/service.py +0 -0
  51. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_word/prompts/generate_nl_ru_prompt.py +0 -0
  52. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_word/prompts/nl_ru.json +0 -0
  53. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_word/service.py +0 -0
  54. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/dependency_links.txt +0 -0
  55. {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/top_level.txt +0 -0
  56. {nl_processing-0.3.0 → nl_processing-0.4.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nl_processing
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Natural language processing playground
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -9,6 +9,7 @@ Requires-Dist: langchain<1,>=0.3
9
9
  Requires-Dist: langchain-openai<1,>=0.3
10
10
  Requires-Dist: opencv-python<5,>=4.10
11
11
  Requires-Dist: asyncpg<1,>=0.30
12
+ Requires-Dist: aiosqlite<1,>=0.20
12
13
 
13
14
  # nl_processing
14
15
 
@@ -90,3 +90,30 @@ async def mark_event(
90
90
  await conn.execute(mark_event_applied_query(table), event_id)
91
91
  except asyncpg.PostgresError as exc:
92
92
  raise DatabaseError(str(exc)) from exc
93
+
94
+
95
+ async def atomic_apply_delta(
96
+ conn: asyncpg.Connection, # type: ignore[type-arg]
97
+ score_table: str,
98
+ events_table: str,
99
+ user_id: str,
100
+ event_id: str,
101
+ source_word_id: int,
102
+ delta: int,
103
+ ) -> bool:
104
+ """Atomically check-apply-mark a score delta in one transaction."""
105
+ try:
106
+ async with conn.transaction():
107
+ already = await conn.fetchrow(check_event_applied_query(events_table), event_id)
108
+ if already is not None:
109
+ return False
110
+ await conn.fetchrow(
111
+ increment_score_query(score_table),
112
+ user_id,
113
+ source_word_id,
114
+ delta,
115
+ )
116
+ await conn.execute(mark_event_applied_query(events_table), event_id)
117
+ return True
118
+ except asyncpg.PostgresError as exc:
119
+ raise DatabaseError(str(exc)) from exc
@@ -133,6 +133,19 @@ def get_user_words_query(
133
133
  return query
134
134
 
135
135
 
136
+ def count_user_words_query(language: str, word_type: str | None) -> str:
137
+ # Table name from Language enum value, not user input # noqa: S608
138
+ query = f"""
139
+ SELECT COUNT(*) AS cnt
140
+ FROM user_words uw
141
+ JOIN words_{language} w ON uw.word_id = w.id
142
+ WHERE uw.user_id = $1 AND uw.language = $2
143
+ """ # noqa: S608
144
+ if word_type is not None:
145
+ query += " AND w.word_type = $3"
146
+ return query
147
+
148
+
136
149
  def increment_score_query(table: str) -> str:
137
150
  # Table name from Language enum values, not user input # noqa: S608
138
151
  return f"""
@@ -53,6 +53,15 @@ class AbstractBackend(ABC):
53
53
  and random ordering.
54
54
  """
55
55
 
56
+ @abstractmethod
57
+ async def count_user_words(
58
+ self,
59
+ user_id: str,
60
+ language: str,
61
+ word_type: str | None = None,
62
+ ) -> int:
63
+ """Return total user-word associations for the given user and language."""
64
+
56
65
  @abstractmethod
57
66
  async def add_user_word(
58
67
  self,
@@ -103,6 +112,22 @@ class AbstractBackend(ABC):
103
112
  ) -> None:
104
113
  """Insert event_id into the applied_events table."""
105
114
 
115
+ @abstractmethod
116
+ async def apply_score_delta_atomic(
117
+ self,
118
+ score_table: str,
119
+ events_table: str,
120
+ user_id: str,
121
+ event_id: str,
122
+ source_word_id: int,
123
+ delta: int,
124
+ ) -> bool:
125
+ """Atomically check-apply-mark a score delta in one transaction.
126
+
127
+ Returns True if the delta was applied, False if event_id was already applied.
128
+ The entire operation (check + increment + mark) runs in a single transaction.
129
+ """
130
+
106
131
  @abstractmethod
107
132
  async def create_tables(
108
133
  self,
@@ -1,8 +1,9 @@
1
- """NeonBackend asyncpg implementation of AbstractBackend for Neon PostgreSQL."""
1
+ """NeonBackend asyncpg implementation for Neon PostgreSQL."""
2
2
 
3
3
  import asyncpg
4
4
 
5
5
  from nl_processing.database.backend._neon_exercise import (
6
+ atomic_apply_delta,
6
7
  check_event,
7
8
  create_exercise_tables,
8
9
  get_scores,
@@ -14,6 +15,7 @@ from nl_processing.database.backend._queries import (
14
15
  CREATE_USER_WORDS,
15
16
  add_translation_link_query,
16
17
  add_word_query,
18
+ count_user_words_query,
17
19
  create_translations_table,
18
20
  create_words_table,
19
21
  get_user_words_query,
@@ -34,7 +36,6 @@ class NeonBackend(AbstractBackend):
34
36
  self._connection: asyncpg.Connection | None = None # type: ignore[type-arg]
35
37
 
36
38
  async def _connect(self) -> asyncpg.Connection: # type: ignore[type-arg]
37
- """Return cached connection, creating it lazily on first call."""
38
39
  if self._connection is None:
39
40
  try:
40
41
  self._connection = await asyncpg.connect(dsn=self._database_url)
@@ -43,13 +44,12 @@ class NeonBackend(AbstractBackend):
43
44
  raise DatabaseError(str(exc)) from exc
44
45
  except OSError as exc:
45
46
  raise DatabaseError(str(exc)) from exc
47
+ if self._connection is None:
48
+ raise DatabaseError("Database connection was not initialized")
46
49
  return self._connection
47
50
 
48
51
  async def create_tables(
49
- self,
50
- languages: list[str],
51
- pairs: list[tuple[str, str]],
52
- exercise_slugs: list[str],
52
+ self, languages: list[str], pairs: list[tuple[str, str]], exercise_slugs: list[str]
53
53
  ) -> None:
54
54
  conn = await self._connect()
55
55
  try:
@@ -78,11 +78,7 @@ class NeonBackend(AbstractBackend):
78
78
  return None
79
79
  return int(row["id"])
80
80
 
81
- async def get_word(
82
- self,
83
- table: str,
84
- normalized_form: str,
85
- ) -> dict[str, str | int] | None:
81
+ async def get_word(self, table: str, normalized_form: str) -> dict[str, str | int] | None:
86
82
  conn = await self._connect()
87
83
  try:
88
84
  row = await conn.fetchrow(get_word_query(table), normalized_form)
@@ -96,24 +92,14 @@ class NeonBackend(AbstractBackend):
96
92
  "word_type": row["word_type"],
97
93
  }
98
94
 
99
- async def add_translation_link(
100
- self,
101
- table: str,
102
- source_id: int,
103
- target_id: int,
104
- ) -> None:
95
+ async def add_translation_link(self, table: str, source_id: int, target_id: int) -> None:
105
96
  conn = await self._connect()
106
97
  try:
107
98
  await conn.execute(add_translation_link_query(table), source_id, target_id)
108
99
  except asyncpg.PostgresError as exc:
109
100
  raise DatabaseError(str(exc)) from exc
110
101
 
111
- async def add_user_word(
112
- self,
113
- user_id: str,
114
- word_id: int,
115
- language: str,
116
- ) -> None:
102
+ async def add_user_word(self, user_id: str, word_id: int, language: str) -> None:
117
103
  conn = await self._connect()
118
104
  try:
119
105
  await conn.execute(ADD_USER_WORD, user_id, word_id, language)
@@ -144,6 +130,19 @@ class NeonBackend(AbstractBackend):
144
130
  raise DatabaseError(str(exc)) from exc
145
131
  return [dict(row) for row in rows]
146
132
 
133
+ async def count_user_words(self, user_id: str, language: str, word_type: str | None = None) -> int:
134
+ conn = await self._connect()
135
+ args: list[str] = [user_id, language]
136
+ if word_type is not None:
137
+ args.append(word_type)
138
+ try:
139
+ count = await conn.fetchval(count_user_words_query(language, word_type), *args)
140
+ except asyncpg.PostgresError as exc:
141
+ raise DatabaseError(str(exc)) from exc
142
+ if count is None:
143
+ return 0
144
+ return int(count)
145
+
147
146
  async def increment_user_exercise_score(
148
147
  self,
149
148
  table: str,
@@ -155,36 +154,29 @@ class NeonBackend(AbstractBackend):
155
154
  return await increment_score(conn, table, user_id, source_word_id, delta)
156
155
 
157
156
  async def get_user_exercise_scores(
158
- self,
159
- table: str,
160
- user_id: str,
161
- source_word_ids: list[int],
157
+ self, table: str, user_id: str, source_word_ids: list[int]
162
158
  ) -> list[dict[str, str | int]]:
163
159
  conn = await self._connect()
164
160
  return await get_scores(conn, table, user_id, source_word_ids)
165
161
 
166
- async def check_event_applied(
167
- self,
168
- table: str,
169
- event_id: str,
170
- ) -> bool:
162
+ async def check_event_applied(self, table: str, event_id: str) -> bool:
171
163
  conn = await self._connect()
172
164
  return await check_event(conn, table, event_id)
173
165
 
174
- async def mark_event_applied(
175
- self,
176
- table: str,
177
- event_id: str,
178
- ) -> None:
166
+ async def mark_event_applied(self, table: str, event_id: str) -> None:
179
167
  conn = await self._connect()
180
168
  await mark_event(conn, table, event_id)
181
169
 
170
+ async def apply_score_delta_atomic(
171
+ self, score_table: str, events_table: str,
172
+ user_id: str, event_id: str, source_word_id: int, delta: int,
173
+ ) -> bool: # fmt: skip
174
+ conn = await self._connect()
175
+ return await atomic_apply_delta(conn, score_table, events_table, user_id, event_id, source_word_id, delta)
182
176
 
183
- def _infer_target_language(source_language: str) -> str:
184
- """Infer the target language for translation lookups.
185
177
 
186
- With only two languages (nl, ru), the target is always the other one.
187
- """
178
+ def _infer_target_language(source_language: str) -> str:
179
+ """Infer the other language in the nl/ru pair."""
188
180
  if source_language == "nl":
189
181
  return "ru"
190
182
  return "nl"
@@ -119,25 +119,21 @@ class ExerciseProgressStore:
119
119
  ) -> None:
120
120
  """Apply a score delta idempotently, guarded by event deduplication.
121
121
 
122
- Validates exercise_type. Skips if event_id was already applied.
122
+ Validates exercise_type and delta. Skips if event_id was already applied.
123
+ The check-increment-mark operation is atomic (single transaction).
123
124
  """
124
125
  self._validate_exercise_type(exercise_type)
125
- already_applied = await self._backend.check_event_applied(
126
- self._applied_events_table,
127
- event_id,
128
- )
129
- if already_applied:
130
- return
126
+ if delta not in (1, -1):
127
+ msg = f"delta must be +1 or -1, got {delta}"
128
+ raise ValueError(msg)
131
129
  table = self._score_tables[exercise_type]
132
- await self._backend.increment_user_exercise_score(
133
- table,
134
- self._user_id,
135
- source_word_id,
136
- delta,
137
- )
138
- await self._backend.mark_event_applied(
139
- self._applied_events_table,
140
- event_id,
130
+ await self._backend.apply_score_delta_atomic(
131
+ score_table=table,
132
+ events_table=self._applied_events_table,
133
+ user_id=self._user_id,
134
+ event_id=event_id,
135
+ source_word_id=source_word_id,
136
+ delta=delta,
141
137
  )
142
138
 
143
139
  def _validate_exercise_type(self, exercise_type: str) -> None:
@@ -140,6 +140,19 @@ class DatabaseService:
140
140
  language=self._target_language,
141
141
  )
142
142
  pairs.append(WordPair(source=source, target=target))
143
+ if limit is None and not random:
144
+ total_count = await self._backend.count_user_words(
145
+ self._user_id,
146
+ self._source_language.value,
147
+ word_type=word_type.value if word_type else None,
148
+ )
149
+ if total_count > len(pairs):
150
+ excluded_count = total_count - len(pairs)
151
+ _logger.warning(
152
+ "%d of %d words excluded from get_words() due to missing translations",
153
+ excluded_count,
154
+ total_count,
155
+ )
143
156
  return pairs
144
157
 
145
158
  @classmethod
@@ -0,0 +1,58 @@
1
+ """DDL and query constants for the local SQLite cache store."""
2
+
3
+ DDL_CACHED_WORD_PAIRS = """
4
+ CREATE TABLE IF NOT EXISTS cached_word_pairs (
5
+ source_word_id INTEGER PRIMARY KEY,
6
+ source_normalized_form TEXT NOT NULL,
7
+ source_word_type TEXT NOT NULL,
8
+ target_word_id INTEGER NOT NULL,
9
+ target_normalized_form TEXT NOT NULL,
10
+ target_word_type TEXT NOT NULL
11
+ )"""
12
+
13
+ DDL_CACHED_SCORES = """
14
+ CREATE TABLE IF NOT EXISTS cached_scores (
15
+ source_word_id INTEGER NOT NULL,
16
+ exercise_type TEXT NOT NULL,
17
+ score INTEGER NOT NULL DEFAULT 0,
18
+ updated_at TEXT NOT NULL,
19
+ PRIMARY KEY (source_word_id, exercise_type)
20
+ )"""
21
+
22
+ DDL_PENDING_SCORE_EVENTS = """
23
+ CREATE TABLE IF NOT EXISTS pending_score_events (
24
+ event_id TEXT PRIMARY KEY,
25
+ source_word_id INTEGER NOT NULL,
26
+ exercise_type TEXT NOT NULL,
27
+ delta INTEGER NOT NULL,
28
+ created_at TEXT NOT NULL,
29
+ flushed_at TEXT,
30
+ last_error TEXT
31
+ )"""
32
+
33
+ DDL_CACHE_METADATA = """
34
+ CREATE TABLE IF NOT EXISTS cache_metadata (
35
+ id INTEGER PRIMARY KEY DEFAULT 1,
36
+ exercise_types TEXT NOT NULL,
37
+ schema_version INTEGER NOT NULL DEFAULT 1,
38
+ last_refresh_started_at TEXT,
39
+ last_refresh_completed_at TEXT,
40
+ last_flush_completed_at TEXT,
41
+ last_error TEXT
42
+ )"""
43
+
44
+ ALL_DDL = [DDL_CACHED_WORD_PAIRS, DDL_CACHED_SCORES, DDL_PENDING_SCORE_EVENTS, DDL_CACHE_METADATA]
45
+
46
+ UPSERT_SCORE = (
47
+ "INSERT INTO cached_scores (source_word_id, exercise_type, score, updated_at) VALUES (?, ?, ?, ?)"
48
+ " ON CONFLICT(source_word_id, exercise_type) DO UPDATE SET score = score + ?, updated_at = ?"
49
+ )
50
+
51
+ INSERT_PENDING_EVENT = (
52
+ "INSERT INTO pending_score_events (event_id, source_word_id, exercise_type, delta, created_at)"
53
+ " VALUES (?, ?, ?, ?, ?)"
54
+ )
55
+
56
+ INSERT_WORD_PAIR = "INSERT INTO cached_word_pairs VALUES (?, ?, ?, ?, ?, ?)"
57
+
58
+ INSERT_SCORE = "INSERT INTO cached_scores (source_word_id, exercise_type, score, updated_at) VALUES (?, ?, ?, ?)"
@@ -0,0 +1,10 @@
1
+ class CacheNotReadyError(Exception):
2
+ """Raised when cached data is requested before the first usable snapshot exists."""
3
+
4
+
5
+ class CacheStorageError(Exception):
6
+ """Raised when the local SQLite cache file cannot be opened, read, or updated."""
7
+
8
+
9
+ class CacheSyncError(Exception):
10
+ """Raised when an explicit refresh or flush operation fails synchronously."""
@@ -0,0 +1,200 @@
1
+ """SQLite data-access layer for the local word-pair / score cache."""
2
+
3
+ from datetime import UTC, datetime
4
+ import json
5
+ import sqlite3
6
+
7
+ import aiosqlite
8
+
9
+ from nl_processing.database_cache._local_store_queries import (
10
+ ALL_DDL,
11
+ INSERT_PENDING_EVENT,
12
+ INSERT_SCORE,
13
+ INSERT_WORD_PAIR,
14
+ UPSERT_SCORE,
15
+ )
16
+ from nl_processing.database_cache.exceptions import CacheStorageError
17
+
18
+
19
+ def _now() -> str:
20
+ return datetime.now(tz=UTC).isoformat()
21
+
22
+
23
+ class LocalStore:
24
+ """Async SQLite store for cached word pairs, scores, and pending events."""
25
+
26
+ def __init__(self, db_path: str) -> None:
27
+ self._db_path = db_path
28
+ self._db: aiosqlite.Connection | None = None
29
+
30
+ @property
31
+ def _conn(self) -> aiosqlite.Connection:
32
+ if self._db is None:
33
+ raise CacheStorageError("LocalStore is not open")
34
+ return self._db
35
+
36
+ async def open(self) -> None:
37
+ """Open the SQLite connection and create tables."""
38
+ try:
39
+ self._db = await aiosqlite.connect(self._db_path)
40
+ self._db.row_factory = aiosqlite.Row
41
+ await self._db.execute("PRAGMA journal_mode=WAL")
42
+ for ddl in ALL_DDL:
43
+ await self._db.execute(ddl)
44
+ await self._db.commit()
45
+ except sqlite3.Error as exc:
46
+ raise CacheStorageError(str(exc)) from exc
47
+
48
+ async def close(self) -> None:
49
+ """Close the SQLite connection."""
50
+ if self._db:
51
+ await self._db.close()
52
+ self._db = None
53
+
54
+ async def get_cached_word_pairs(
55
+ self,
56
+ word_type: str | None = None,
57
+ limit: int | None = None,
58
+ *,
59
+ random: bool = False,
60
+ ) -> list[dict[str, str | int]]:
61
+ """Query cached word pairs with optional filter, limit, and random ordering."""
62
+ sql = "SELECT * FROM cached_word_pairs"
63
+ params: list[str | int] = []
64
+ if word_type is not None:
65
+ sql += " WHERE source_word_type = ?"
66
+ params.append(word_type)
67
+ if random:
68
+ sql += " ORDER BY RANDOM()"
69
+ if limit is not None:
70
+ sql += " LIMIT ?"
71
+ params.append(limit)
72
+ return await self._fetch_all(sql, params)
73
+
74
+ async def get_cached_word_pairs_with_scores(self, exercise_types: list[str]) -> list[dict[str, str | int]]:
75
+ """Query word pairs and attach scores per exercise type (missing = 0)."""
76
+ try:
77
+ rows = await self._fetch_all("SELECT * FROM cached_word_pairs")
78
+ for row in rows:
79
+ for et in exercise_types:
80
+ sc = await self._conn.execute(
81
+ "SELECT score FROM cached_scores WHERE source_word_id=? AND exercise_type=?",
82
+ (row["source_word_id"], et),
83
+ )
84
+ score_row = await sc.fetchone()
85
+ row[f"score_{et}"] = int(score_row["score"]) if score_row else 0
86
+ return rows
87
+ except sqlite3.Error as exc:
88
+ raise CacheStorageError(str(exc)) from exc
89
+
90
+ async def get_pending_events(self) -> list[dict[str, str | int]]:
91
+ return await self._fetch_all("SELECT * FROM pending_score_events WHERE flushed_at IS NULL ORDER BY created_at")
92
+
93
+ async def get_pending_event_count(self) -> int:
94
+ try:
95
+ cur = await self._conn.execute("SELECT COUNT(*) FROM pending_score_events WHERE flushed_at IS NULL")
96
+ row = await cur.fetchone()
97
+ return int(row[0]) if row else 0
98
+ except sqlite3.Error as exc:
99
+ raise CacheStorageError(str(exc)) from exc
100
+
101
+ async def get_metadata(self) -> dict[str, str | int] | None:
102
+ try:
103
+ cur = await self._conn.execute("SELECT * FROM cache_metadata WHERE id = 1")
104
+ row = await cur.fetchone()
105
+ return dict(row) if row else None
106
+ except sqlite3.Error as exc:
107
+ raise CacheStorageError(str(exc)) from exc
108
+
109
+ async def has_snapshot(self) -> bool:
110
+ try:
111
+ cur = await self._conn.execute("SELECT 1 FROM cached_word_pairs LIMIT 1")
112
+ return (await cur.fetchone()) is not None
113
+ except sqlite3.Error as exc:
114
+ raise CacheStorageError(str(exc)) from exc
115
+
116
+ async def record_score_and_event(
117
+ self,
118
+ source_word_id: int,
119
+ exercise_type: str,
120
+ delta: int,
121
+ event_id: str,
122
+ ) -> None:
123
+ """Atomically upsert a cached score and insert a pending event."""
124
+ now = _now()
125
+ try:
126
+ await self._conn.execute(UPSERT_SCORE, (source_word_id, exercise_type, delta, now, delta, now))
127
+ await self._conn.execute(INSERT_PENDING_EVENT, (event_id, source_word_id, exercise_type, delta, now))
128
+ await self._conn.commit()
129
+ except sqlite3.Error as exc:
130
+ raise CacheStorageError(str(exc)) from exc
131
+
132
+ async def rebuild_snapshot(
133
+ self,
134
+ word_pairs: list[tuple[int, str, str, int, str, str]],
135
+ scores: dict[tuple[int, str], int],
136
+ ) -> None:
137
+ """Atomically replace cached word pairs and scores, then reapply pending events."""
138
+ now = _now()
139
+ try:
140
+ await self._conn.execute("DELETE FROM cached_word_pairs")
141
+ await self._conn.execute("DELETE FROM cached_scores")
142
+ for wp in word_pairs:
143
+ await self._conn.execute(INSERT_WORD_PAIR, wp)
144
+ for (wid, et), score in scores.items():
145
+ await self._conn.execute(INSERT_SCORE, (wid, et, score, now))
146
+ for evt in await self.get_pending_events():
147
+ await self._conn.execute(
148
+ UPSERT_SCORE,
149
+ (evt["source_word_id"], evt["exercise_type"], evt["delta"], now, evt["delta"], now),
150
+ )
151
+ await self._conn.commit()
152
+ except sqlite3.Error as exc:
153
+ raise CacheStorageError(str(exc)) from exc
154
+
155
+ async def mark_event_flushed(self, event_id: str) -> None:
156
+ await self._exec_commit("UPDATE pending_score_events SET flushed_at=? WHERE event_id=?", (_now(), event_id))
157
+
158
+ async def mark_event_failed(self, event_id: str, error: str) -> None:
159
+ await self._exec_commit("UPDATE pending_score_events SET last_error=? WHERE event_id=?", (error, event_id))
160
+
161
+ async def update_metadata(self, **fields: str | int | None) -> None:
162
+ if not fields:
163
+ return
164
+ set_clause = ", ".join(f"{k} = ?" for k in fields)
165
+ await self._exec_commit(
166
+ f"UPDATE cache_metadata SET {set_clause} WHERE id = 1", # noqa: S608
167
+ tuple(fields.values()),
168
+ )
169
+
170
+ async def ensure_metadata(self, exercise_types: list[str]) -> None:
171
+ await self._exec_commit(
172
+ "INSERT OR REPLACE INTO cache_metadata (id, exercise_types, schema_version) VALUES (1, ?, 1)",
173
+ (json.dumps(exercise_types),),
174
+ )
175
+
176
+ async def get_source_word_id(self, normalized_form: str, word_type: str) -> int | None:
177
+ """Look up a source_word_id from cached_word_pairs."""
178
+ try:
179
+ cur = await self._conn.execute(
180
+ "SELECT source_word_id FROM cached_word_pairs WHERE source_normalized_form=? AND source_word_type=?",
181
+ (normalized_form, word_type),
182
+ )
183
+ row = await cur.fetchone()
184
+ return int(row["source_word_id"]) if row else None
185
+ except sqlite3.Error as exc:
186
+ raise CacheStorageError(str(exc)) from exc
187
+
188
+ async def _fetch_all(self, sql: str, params: list[str | int] | None = None) -> list[dict[str, str | int]]:
189
+ try:
190
+ cur = await self._conn.execute(sql, params or [])
191
+ return [dict(row) for row in await cur.fetchall()]
192
+ except sqlite3.Error as exc:
193
+ raise CacheStorageError(str(exc)) from exc
194
+
195
+ async def _exec_commit(self, sql: str, params: tuple[str | int | None, ...]) -> None:
196
+ try:
197
+ await self._conn.execute(sql, params)
198
+ await self._conn.commit()
199
+ except sqlite3.Error as exc:
200
+ raise CacheStorageError(str(exc)) from exc
@@ -0,0 +1,5 @@
1
+ import logging
2
+
3
+
4
+ def get_logger(name: str) -> logging.Logger:
5
+ return logging.getLogger(f"nl_processing.database_cache.{name}")
@@ -0,0 +1,12 @@
1
+ from datetime import datetime
2
+
3
+ from pydantic import BaseModel
4
+
5
+
6
+ class CacheStatus(BaseModel):
7
+ is_ready: bool
8
+ is_stale: bool
9
+ has_snapshot: bool
10
+ pending_events: int
11
+ last_refresh_completed_at: datetime | None
12
+ last_flush_completed_at: datetime | None
@@ -0,0 +1,185 @@
1
+ """DatabaseCacheService — public API for the local SQLite cache layer."""
2
+
3
+ import asyncio
4
+ from datetime import UTC, datetime, timedelta
5
+ import json
6
+ import tempfile
7
+ from uuid import uuid4
8
+
9
+ from nl_processing.core.models import Language, PartOfSpeech, Word
10
+ from nl_processing.database.exercise_progress import ExerciseProgressStore
11
+ from nl_processing.database.models import ScoredWordPair, WordPair
12
+ from nl_processing.database_cache.exceptions import CacheNotReadyError
13
+ from nl_processing.database_cache.local_store import LocalStore
14
+ from nl_processing.database_cache.logging import get_logger
15
+ from nl_processing.database_cache.models import CacheStatus
16
+ from nl_processing.database_cache.sync import CacheSyncer
17
+
18
+ _log = get_logger("service")
19
+
20
+
21
+ class DatabaseCacheService:
22
+ """Offline-first cache backed by a local SQLite database."""
23
+
24
+ def __init__(
25
+ self,
26
+ *,
27
+ user_id: str,
28
+ source_language: Language,
29
+ target_language: Language,
30
+ exercise_types: list[str],
31
+ cache_ttl: timedelta,
32
+ cache_dir: str | None = None,
33
+ ) -> None:
34
+ if not exercise_types:
35
+ msg = "exercise_types must be a non-empty list"
36
+ raise ValueError(msg)
37
+ self._user_id = user_id
38
+ self._source_language = source_language
39
+ self._target_language = target_language
40
+ self._exercise_types = list(exercise_types)
41
+ self._cache_ttl = cache_ttl
42
+ base = cache_dir or tempfile.gettempdir()
43
+ self._db_path = f"{base}/{user_id}_{source_language.value}_{target_language.value}.db"
44
+ self._initialized = False
45
+ self._local: LocalStore | None = None
46
+ self._syncer: CacheSyncer | None = None
47
+
48
+ async def init(self) -> CacheStatus:
49
+ """Open local store, bootstrap or refresh as needed, return status."""
50
+ progress_store = ExerciseProgressStore(
51
+ user_id=self._user_id,
52
+ source_language=self._source_language,
53
+ target_language=self._target_language,
54
+ exercise_types=self._exercise_types,
55
+ )
56
+ self._local = LocalStore(self._db_path)
57
+ await self._local.open()
58
+ self._syncer = CacheSyncer(self._local, progress_store)
59
+ await self._local.ensure_metadata(self._exercise_types)
60
+ meta = await self._local.get_metadata()
61
+ if meta and json.loads(str(meta["exercise_types"])) != self._exercise_types:
62
+ await self._local.ensure_metadata(self._exercise_types)
63
+ await self._syncer.refresh()
64
+ elif not await self._local.has_snapshot():
65
+ await self._syncer.refresh()
66
+ elif self._is_stale(meta):
67
+ asyncio.create_task(self._background_refresh())
68
+ self._initialized = True
69
+ return await self.get_status()
70
+
71
+ async def get_words(
72
+ self,
73
+ *,
74
+ word_type: str | None = None,
75
+ limit: int | None = None,
76
+ random: bool = False,
77
+ ) -> list[WordPair]:
78
+ """Return cached word pairs, optionally filtered."""
79
+ self._ensure_ready()
80
+ assert self._local is not None
81
+ rows = await self._local.get_cached_word_pairs(word_type=word_type, limit=limit, random=random)
82
+ return [self._row_to_word_pair(r) for r in rows]
83
+
84
+ async def get_word_pairs_with_scores(self) -> list[ScoredWordPair]:
85
+ """Return cached word pairs with exercise scores."""
86
+ self._ensure_ready()
87
+ assert self._local is not None
88
+ rows = await self._local.get_cached_word_pairs_with_scores(self._exercise_types)
89
+ result: list[ScoredWordPair] = []
90
+ for row in rows:
91
+ pair = self._row_to_word_pair(row)
92
+ scores = {et: int(row[f"score_{et}"]) for et in self._exercise_types}
93
+ result.append(ScoredWordPair(pair=pair, scores=scores, source_word_id=int(row["source_word_id"])))
94
+ return result
95
+
96
+ async def record_exercise_result(self, *, source_word: Word, exercise_type: str, delta: int) -> None:
97
+ """Record a score change locally and queue for remote flush."""
98
+ self._ensure_ready()
99
+ assert self._local is not None
100
+ if exercise_type not in self._exercise_types:
101
+ msg = f"Unknown exercise_type '{exercise_type}'; expected one of {sorted(self._exercise_types)}"
102
+ raise ValueError(msg)
103
+ if delta not in (1, -1):
104
+ msg = f"delta must be +1 or -1, got {delta}"
105
+ raise ValueError(msg)
106
+ wid = await self._local.get_source_word_id(source_word.normalized_form, source_word.word_type.value)
107
+ if wid is None:
108
+ msg = f"Word '{source_word.normalized_form}' not found in cache"
109
+ raise ValueError(msg)
110
+ await self._local.record_score_and_event(wid, exercise_type, delta, str(uuid4()))
111
+ asyncio.create_task(self._background_flush())
112
+
113
+ async def refresh(self) -> None:
114
+ """Trigger a full cache refresh from the remote database."""
115
+ assert self._syncer is not None
116
+ await self._syncer.refresh()
117
+
118
+ async def flush(self) -> None:
119
+ """Flush pending score events to the remote database."""
120
+ assert self._syncer is not None
121
+ await self._syncer.flush()
122
+
123
+ async def get_status(self) -> CacheStatus:
124
+ """Build current cache status from metadata and pending events."""
125
+ assert self._local is not None
126
+ meta = await self._local.get_metadata()
127
+ has_snap = await self._local.has_snapshot()
128
+ pending = await self._local.get_pending_event_count()
129
+ last_refresh = _parse_dt(meta, "last_refresh_completed_at") if meta else None
130
+ last_flush = _parse_dt(meta, "last_flush_completed_at") if meta else None
131
+ return CacheStatus(
132
+ is_ready=self._initialized and has_snap,
133
+ is_stale=self._is_stale(meta),
134
+ has_snapshot=has_snap,
135
+ pending_events=pending,
136
+ last_refresh_completed_at=last_refresh,
137
+ last_flush_completed_at=last_flush,
138
+ )
139
+
140
+ def _ensure_ready(self) -> None:
141
+ if not self._initialized or self._local is None:
142
+ raise CacheNotReadyError("Cache not initialized — call init() first")
143
+
144
+ def _is_stale(self, meta: dict[str, str | int] | None) -> bool:
145
+ if not meta:
146
+ return True
147
+ last_refresh = _parse_dt(meta, "last_refresh_completed_at")
148
+ if last_refresh is None:
149
+ return True
150
+ return datetime.now(tz=UTC) - last_refresh > self._cache_ttl
151
+
152
+ def _row_to_word_pair(self, row: dict[str, str | int]) -> WordPair:
153
+ return WordPair(
154
+ source=Word(
155
+ normalized_form=str(row["source_normalized_form"]),
156
+ word_type=PartOfSpeech(row["source_word_type"]),
157
+ language=self._source_language,
158
+ ),
159
+ target=Word(
160
+ normalized_form=str(row["target_normalized_form"]),
161
+ word_type=PartOfSpeech(row["target_word_type"]),
162
+ language=self._target_language,
163
+ ),
164
+ )
165
+
166
+ async def _background_refresh(self) -> None:
167
+ try:
168
+ assert self._syncer is not None
169
+ await self._syncer.refresh()
170
+ except Exception:
171
+ _log.exception("background refresh failed")
172
+
173
+ async def _background_flush(self) -> None:
174
+ try:
175
+ assert self._syncer is not None
176
+ await self._syncer.flush(skip_if_running=True)
177
+ except Exception:
178
+ _log.exception("background flush failed")
179
+
180
+
181
+ def _parse_dt(meta: dict[str, str | int], key: str) -> datetime | None:
182
+ val = meta[key] if key in meta else None
183
+ if val is None:
184
+ return None
185
+ return datetime.fromisoformat(str(val))
@@ -0,0 +1,82 @@
1
+ """Refresh / flush orchestration for the local cache."""
2
+
3
+ import asyncio
4
+ from datetime import UTC, datetime
5
+
6
+ from nl_processing.database.exercise_progress import ExerciseProgressStore
7
+ from nl_processing.database_cache.exceptions import CacheSyncError
8
+ from nl_processing.database_cache.local_store import LocalStore
9
+ from nl_processing.database_cache.logging import get_logger
10
+
11
+ _log = get_logger("sync")
12
+
13
+
14
+ class CacheSyncer:
15
+ """Coordinates full refresh from remote and flush of pending events back to remote."""
16
+
17
+ def __init__(self, local_store: LocalStore, progress_store: ExerciseProgressStore) -> None:
18
+ self._local = local_store
19
+ self._remote = progress_store
20
+ self._refresh_lock = asyncio.Lock()
21
+ self._flush_lock = asyncio.Lock()
22
+
23
+ async def refresh(self) -> None:
24
+ """Pull a full snapshot from the remote database and rebuild the local cache."""
25
+ if self._refresh_lock.locked():
26
+ return
27
+ async with self._refresh_lock:
28
+ now = datetime.now(tz=UTC).isoformat()
29
+ try:
30
+ await self._local.update_metadata(last_refresh_started_at=now)
31
+ scored_pairs = await self._remote.export_remote_snapshot()
32
+ word_pairs: list[tuple[int, str, str, int, str, str]] = [
33
+ (
34
+ sp.source_word_id,
35
+ sp.pair.source.normalized_form,
36
+ sp.pair.source.word_type.value,
37
+ 0,
38
+ sp.pair.target.normalized_form,
39
+ sp.pair.target.word_type.value,
40
+ )
41
+ for sp in scored_pairs
42
+ ]
43
+ scores: dict[tuple[int, str], int] = {}
44
+ for sp in scored_pairs:
45
+ for exercise_type, score in sp.scores.items():
46
+ scores[(sp.source_word_id, exercise_type)] = score
47
+ await self._local.rebuild_snapshot(word_pairs, scores)
48
+ await self._local.update_metadata(
49
+ last_refresh_completed_at=datetime.now(tz=UTC).isoformat(),
50
+ )
51
+ except CacheSyncError:
52
+ raise
53
+ except Exception as exc:
54
+ _log.exception("refresh failed")
55
+ await self._local.update_metadata(last_error=str(exc))
56
+ raise CacheSyncError(str(exc)) from exc
57
+
58
+ async def flush(self, *, skip_if_running: bool = False) -> None:
59
+ """Push pending local score events to the remote database.
60
+
61
+ Args:
62
+ skip_if_running: If True, return immediately if another flush is already running.
63
+ If False (default), wait for any running flush to complete.
64
+ """
65
+ if skip_if_running and self._flush_lock.locked():
66
+ return
67
+ async with self._flush_lock:
68
+ events = await self._local.get_pending_events()
69
+ for evt in events:
70
+ eid = str(evt["event_id"])
71
+ try:
72
+ await self._remote.apply_score_delta(
73
+ event_id=eid,
74
+ source_word_id=int(evt["source_word_id"]),
75
+ exercise_type=str(evt["exercise_type"]),
76
+ delta=int(evt["delta"]),
77
+ )
78
+ await self._local.mark_event_flushed(eid)
79
+ except Exception as exc:
80
+ _log.warning("flush failed for event %s: %s", eid, exc)
81
+ await self._local.mark_event_failed(eid, str(exc))
82
+ await self._local.update_metadata(last_flush_completed_at=datetime.now(tz=UTC).isoformat())
@@ -6,7 +6,7 @@ Usage:
6
6
  This script:
7
7
  1. Generates synthetic test images and encodes real photos
8
8
  2. Encodes them to base64
9
- 3. Builds a ChatPromptTemplate with 5 few-shot examples (HumanMessage + AIMessage + ToolMessage triplets)
9
+ 3. Builds a ChatPromptTemplate with 7 few-shot examples (HumanMessage + AIMessage + ToolMessage triplets)
10
10
  4. Serializes with dumpd() and saves to nl.json
11
11
 
12
12
  The script is the source of truth — nl.json is the generated artifact.
@@ -92,6 +92,9 @@ EXAMPLE_5_EXPECTED = ""
92
92
  EXAMPLE_6_TEXT = "Please take your shoes off before entering the house"
93
93
  EXAMPLE_6_EXPECTED = ""
94
94
 
95
+ EXAMPLE_7_TEXT = "Remember to bring your umbrella tomorrow"
96
+ EXAMPLE_7_EXPECTED = ""
97
+
95
98
  OUTPUT_PATH = Path(__file__).parent / "nl.json"
96
99
 
97
100
 
@@ -128,13 +131,14 @@ def _make_example_ai(expected_text: str, call_id: str) -> AIMessage:
128
131
 
129
132
 
130
133
  def build_prompt() -> ChatPromptTemplate:
131
- """Build the Dutch extraction prompt with 6 few-shot examples."""
134
+ """Build the Dutch extraction prompt with 7 few-shot examples."""
132
135
  img1 = _generate_image_b64(EXAMPLE_1_TEXT)
133
136
  img2 = _generate_image_b64(EXAMPLE_2_TEXT)
134
137
  img3 = _encode_existing_image_b64(EXAMPLE_3_IMAGE)
135
138
  img4 = _encode_existing_image_b64(EXAMPLE_4_IMAGE)
136
139
  img5 = _generate_image_b64(EXAMPLE_5_TEXT)
137
140
  img6 = _generate_image_b64(EXAMPLE_6_TEXT)
141
+ img7 = _generate_image_b64(EXAMPLE_7_TEXT)
138
142
 
139
143
  return ChatPromptTemplate.from_messages([
140
144
  SystemMessage(content=SYSTEM_INSTRUCTION),
@@ -156,6 +160,9 @@ def build_prompt() -> ChatPromptTemplate:
156
160
  _make_example_human(img6),
157
161
  _make_example_ai(EXAMPLE_6_EXPECTED, "call_example_6"),
158
162
  ToolMessage(content=EXAMPLE_6_EXPECTED, tool_call_id="call_example_6"),
163
+ _make_example_human(img7),
164
+ _make_example_ai(EXAMPLE_7_EXPECTED, "call_example_7"),
165
+ ToolMessage(content=EXAMPLE_7_EXPECTED, tool_call_id="call_example_7"),
159
166
  MessagesPlaceholder(variable_name="images"),
160
167
  ])
161
168
 
@@ -398,6 +398,68 @@
398
398
  "status": "success"
399
399
  }
400
400
  },
401
+ {
402
+ "lc": 1,
403
+ "type": "constructor",
404
+ "id": [
405
+ "langchain",
406
+ "schema",
407
+ "messages",
408
+ "HumanMessage"
409
+ ],
410
+ "kwargs": {
411
+ "content": [
412
+ {
413
+ "type": "image_url",
414
+ "image_url": {
415
+ "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAyAAAADICAIAAACf7RJNAAAXuklEQVR4Ae3BAYor14IFwcz9LzoHLggk1KXu51/tsfGJsGJmZmZm7mPFzMzMzNzHipmZmZm5jxUzMzMzcx8rZmZmZuY+VszMzMzMfayYmZmZmftYMTMzMzP3sWJmZmZm7mPFzMzMzNzHipmZmZm5jxUzMzMzcx8rZmZmZuY+VszMf4nKUTEzM7/Aipuo/EDF/D9RK/5L1Ip/NrXib6HypGLm30Ot+FdRKy6oFf9IasX8JVbcQeUPVczfSOWo+G9QOSr+qVSOit+n8qRi5l9C5aj4l1A5Kt6oHBX/MCpHxfw5K+6g8ucq5u+iclT8N6gcFf9UKkfF71N5UjHzL6FyVPxLqBwVb1SOin8YlaNi/pwVd1A5Kj5SeaiYv4vKUfHfoHJU/FOpHBV/C5WjYubfQ+Wo+JdQOSreqBwV/zAqR8X8OSvuoHJUfEfloWL+FipHxX+DylHxT6VyVMzMNZWj4l9C5ah4o3JU/MOoHBXz56y4g8pR8QMqR8X8LVSOiv8GlaPin0rlqJiZaypHxb+EylHxRuWo+IdROSrmz1lxB5Wj4mdUjor5fSpHxX+DylHxT6VyVMzMNZWj4l9C5ah4o3JU/MOoHBXz56y4g8pR8TMqR8X8PpWj4r9B5aj4p1I5KmbmmspR8S+hclS8UTkq/mFUjor5c1bcQeWo+BmVo+KayquK76gVDyqvKt6ovKr4EypPKr6jVjxReVLxFZUnFR+pHBWHypOKP6TyquI7asWDypOK/43KhYrvqLyq+DUqRwWovKr4AbXiQeVJxRO14oJa8aDyquJPqDypeFArbqJW/IxacU3lVcUPqBXfUSu+olY8UXlScQe14jtqxQW14onKk4qvqDyp+EjlqACVVxU/oFY8qDypuKbypOIjlaPijcpR8R2VJxWHWnEflQsV31F5UvEDasUTlScVX1F5VfEnVJ5U/IBa8UTliRV3UDkqfkDlqLigcq3igspRqVyrOFSuVXxH5ULFBZWjAlQuVDyoXKi4oHJUKhcqfkDlWsUFlaNS+UrFX6XyUcUFlWsVv0DlByquqRyVylcqDpWj4o3KUalcq/iOyoVK5aj4n6kcFd9ROSreqFyruKZyVFxTOSpeqRwVoPKViv+NylFxTeWoeKNyVIDKhYoHlQsVF1R+oOKaylGpfKXijcqFigsqR8UblaPimsp3Ku6g8lHFBZULFddUjgpQuVDxoHKt4jsqFyquqRwVoPLGijuoHBXfUXmo+IrKdyq+ovJjlcp3Kq6pfFTxFZWjUvmoAlQ+qviKys9UfKTynYqvqHxU8b9RuVBxQeU7FXdT+ZmKCyofVTyoHBVvVH6s4prKz1T8z1QeKq6pPFS8UvlOxQWVo+KaylHxSuWoVL5S8T9TOSquqRwVb1SOSuWjClD5qOIrKj9TcUHlo4o3Kh9VfEXlqHijclRcUPmBipuoXKi4oPJRxQWVo1L5qAJUvlNxTeWjigsqR6XyFSvuoHJUXFN5UvEVlScVDypPKt6ovKl4ULlQ8aDyUHFB5aHiicpDxRuVNxUPKg+VykPFE5WHijcqryoeVJ5UXFB5UvGg8qTijcqbil+gclR8pPKk4kHlScWtVF5VPKg8qfiKypuKr6gcFW9U3lQ8qDxUXFB5qHii8qriDipHxTWVo+KVypOKB5UnFV9ROSquqRwVr1TeVNxN5ai4pnJUvFF5U/Gg8lCpPFQ8qDxUfEXlVcWDypOKr6i8qbim8lDxoPKk4o3KUfFG5aj4ispDxYPKq4pbqRwV31F5qHii8lDxFZU3FU9ULlQ8qDxUXFB5qHhQeVLxFZU3FU+suIPKH6r4ispDxVdUjoo3Kk8q3qi8qnijclR8ReWh4isqR8UrlVcVb1ReVbxROSreqDypeKPyUPFG5aHiKypHxRuVJxW/RuWouKbyUPEVlYeK+6g8VHxF5aHijcqTimsqR8UblScVb1SOiq+oPFS8UXlScQeVo+KCykPFE5WHiq+oPFS8UTkqrqkcFa9UXlX8ApWj4prKUfFG5UnFG5U3FW9Ujoo3Kg8VX1F5qHij8qTimsqTijcqDxWvVI6KNypHxSuVJxVvVB4qbqVyVHyk8lDxRuVJxSuVVxVvVF5VvFE5Kt6oPKl4o/Kk4pXKq4pXVtxB5Q9VfEXloeKCylHxSuWh4isqTyouqBwVr1QeKi6oHBWvVJ5UfEXlScVXVI6KNyoPFRdUHipeqTxUXFA5Kl6pPFT8JpWj4prKUXFB5aHiPipHxQWVh4o3Kg8VH6kcFW9UHiouqBwVr1QeKi6oPFTcROWo+IrKUfFK5ai4oPJQ8UblqLimclS8UnlS8TtUjoprKkfFG5UnFV9ReVLxFZWj4o3KQ8UFlaPijcqTimsqDxUXVI6KVypHxRuVo+KVykPFBZWj4lYqR8U1lYeKCyoPFa9UnlR8ReVJxVdUHipeqTxUXFB5qHil8qTijRV3UPmBio9UHiquqRwVr1SOimsqR8U1laPilcpR8ZHKUfFE5aHigspDxQWVh4pXKkfFRypHxROVh4prKkfFK5WHit+kclRcUHmouKbyUHETlaPimspDxSuVh4qPVI6KNypHxTWVo+KVylHxkcpRcROVo+IrKkfFE5WHimsqDxWvVI6KaypHxSuVh4pfo3JUXFM5Kt6oPFRcUHmouKByVLxROSquqTxUvFJ5qPhI5aj4SOWoeKJyVLxROSpeqRwV11SOilupHBXXVI6Kj1SOilcqDxUXVB4qrqkcFa9UjoqPVI6KVyoPFV+x4g4qR8UblaPiI5U/VPFK5ai4pnJUXFM5Kl6p/KGKJypHxTWVo+IjlaPilcpR8ZHKUfFE5Q9VvFI5Kn6ZylFxQeWo+I7KUXETlaPiI5Wj4pXKUfEdlaPijcpRcU3lqHilclR8pHJU3ETloeKVylHxSuWo+I7KUfFK5ai4pnJUvFJ5qPg1KkfFNZWj4o3KUXFN5ai4pnJUvFE5Kj5SOSpeqRwVH6n8uYonKkfFG5Wj4onKQ8U1laPiVipHxTWVo+IjlYeKJypHxUcqR8U1laPiicpDxUcqDxVPVB4qvmLFHVSOijcqDxXXVP5QxSuVo+KaylFxTeWoeKLy5yqeqBwV11SOio9UjopXKkfFRyoPFQ8qf6jilcpR8ctUjooLKkfFd1SOipuoHBUfqRwVr1SOiu+oHBVvVI6KaypHxROVh4qPVI6K+6gcFa9UjopXKkfFd1SOilcqR8U1laPilcpDxa9ROSquqRwVb1SOimsqR8U1laPijcpR8ZHKUfFK5aj4SOXPVTxROSreqBwVT1SOio9UjopbqRwVF1QeKr6jclQ8UTkqPlI5Kq6pHBVPVI6KH1A5Kp6oHBUXrLiDylHxFZWj4prKn6h4o3JUXFM5Kq6pHBVPVP5QxSuVo+KaylHxkcpR8UrlqPiOylHxoPInKt6oHBW/TOWouKByVHxH5ai4icpR8ZHKUfFK5aj4jspR8UblqLimclQ8UTkqvqNyVNxH5aHiQeWh4pXKUfEdlaPilcpRcU3lqHilclT8JpWj4prKUfFG5ai4pnJUXFM5Kt6oHBUfqRwVr1SOio9U/lDFK5Wj4o3KUfFE5aj4SOWouJXKUXFB5aj4AZWj4onKUfGRylFxTeWoeKJyVPyAylHxROWouGDFHVSOiq+oPFRcUDkq/iqVo+KaylFxTeWoeKJyVPxVKkfFNZWj4iOVo+KVylHxHZWj4kHlqPirVI6KX6ZyVFxQOSq+o3JU3ETlqPhI5ah4pXJUfEflqHijclRcUzkqnqgcFd9ROSpupXJUPKgcFW9UjorvqBwVr1SOimsqR8UrlaPiN6kcFddUjoo3KkfFNZWj4prKUfFG5aj4SOWoeKVyVHykclT8VSpHxRuVo+KJylHxkcpRcSuVo+KCylHxAypHxROVo+IjlaPimspR8UTlqPgBlaPiicpRccGKO6gcFRdUHiq+onJU/FUqR8U1laPimspR8UTloeIvUTkqrqkcFR+pHBWvVI6Kj1QeKh5Ujoq/SuWo+GUqR8UFlaPiOypHxU1UjoqPVI6KVypHxXdUjoo3KkfFNZWj4onKQ8VHKkfFrVSOikPloeKNylHxHZWj4pXKUXFN5ah4pXJU/CaVo+KaylHxRuWouKZyVFxTOSreqBwVH6kcFa9UjoqPVI6Kv0rlqHijclQ8UTkqPlI5Km6lclRcUHmo+I7KUfFE5aj4SOWouKZyVDxROSp+QOWoeKJyVFyw4g4qR8U1laPiKypHxV+lclRcUzkqrqkcFa9Ujoq/ROWouKZyVHykclS8UjkqPlI5Kp6oHBV/lcpR8ctUjooLKkfFd1SOipuoHBUfqRwVr1SOiu+oHBVvVI6KaypHxSuVo+IjlaPibipHBagcFV9ROSq+o3JUvFI5Kq6pHBWvVI6K36RyVFxTOSreqBwV11SOimsqR8UblaPiI5Wj4pXKUfEdlaPiL1E5Kt6oHBVPVB4qrqkcFbdSOSquqRwVH6k8VDxROSo+UjkqrqkcFa9UjoqPVI6KVypHxQUr7qByVFxTeah4o/JQ8ZFa8UblqLimclRcUzkqXqkcFd9RK16pHBXXVI6Kj1SOilcqR8VHKkfFE5WHio/UijcqR8UvUzkqLqg8VFxTeai4icpRcU3loeKVylHxHZWj4o3KUXFN5ah4pXJUfKRyVNxN5agAlaPiKyoPFddUHipeqRwVF1QeKl6pHBW/SeWouKZyVLxROSquqRwV11SOijcqR8U1lYeKVypHxXdUjorvqBWvVI6KNypHxSuVo+KaylFxK5Wj4prKUfGRylHxSuWo+EjlqLimclS8UjkqPlI5Kl6pHBUXrLiDylHxkcpDxRuVo+KaylHxSuWouKZyVFxTOSpeqTxUXFM5Kp6oHBXXVI6Kj1SOilcqR8U1lYeKVypHxTWVo+KVylHxy1SOimsqR8U1laPiPipHxTWVo+KNylHxHZWj4o3KUXFN5ah4pfJQcU3lqLibypuKaypHxTWVo+KNylHxFZUnFa9UjorfpPJQ8RWVh4o3KkfFNZWj4prKUfFG5ai4pnJUvFE5Kr6jclR8pHJUPFE5Kt6oHBWvVI6KaypHxa1UHiouqDxUXFM5Kl6pHBUfqRwV11SOilcqR8VHKkfFK5Wj4oIVd1A5Kj5Seah4o/JQ8RWVh4pXKkfFNZWj4prKUfFG5aHiKyoPFU9UjoprKkfFRypHxSuVh4qvqDxUvFF5qPiKykPFK5Wj4pepHBXXVB4qvqLyUHEflYeKr6g8VLxROSq+o3JUvFE5Kq6pHBVvVB4qvqLyUPELVF5VXFN5qPiKykPFG5Wj4o3Kq4pXKkfFL1M5Kt6oPKl4o3JUXFM5Kq6pHBVvVB4qvqLyUPFG5aj4jspDxQWVo+KVylHxRuWoeKXyUPEVlYeKu6kcFddUHiq+ovJQ8UrlqPhI5ai4pnJUvFJ5qLig8lDxSuWouGDFHVSOiu+oPFS8UXlS8UTloeKNylFxTeWouKZyVHxF5aHilcpDxSuVo+KaylHxkcpR8UrlVcUTlYeKCypPKp6oPFS8UTkqfpnKQ8U1lScVT1QeKm6l8qriicpDxVdUjorvqBwVb1SOimsqR8VXVB4qXqk8qfgFKq8qPlJ5UvFE5aHiKyoPFU9U3lS8UjkqfpnKQ8UTlVcVb1SOimsqR8U1laPijcqriicqDxVfUTkqfkDlScUTlScVr1SOijcqR8UblScVT1SeVNxN5aHimspDxSuVh4o3KkfFRypHxTWVo+KNypOKJypPKt6oHBUXrLiDylHxAypHxVdUvlPxFZWj4prKUXFN5aj4isoPVLxROSquqRwVH6kcFa9Ufqzimsp3Kr6iclT8PpU3FW9UvlNxN5WfqbigclR8R+WoeKNyVFxTOSq+ovJjFb9D5aHiB1S+U3FN5ccqXqkcFb9M5ccq3qgcFddUjoprKkfFG5WfqbigclT8jMoPVLxROSreqBwVX1H5mYq7qbypeKPyAxVfUTkqPlI5Kq6pHBVfUfmBiq+oHBUXrLiDylHxAyoPFV9R+ajiKypHxTWVo+KaylFxQeWjiq+oHBXXVI6Kj1SOilcqP1PxHZWPKr6iclT8PpWvVLxS+ajiF6j8QMU1laPiOypHxRuVo+KaylFxQeVnKn6HykPFD6h8VPGRykeVylHxSuWo+H0qP1PxRuWouKZyVFxTOSreqPxMxQWVo+LHVD6q+IrKUfFG5ai4oPIDFb9A5U3FG5WPKi6oHBUfqRwV11SOigsqH1VcUDkqLlhxE7Xix1Sg4iOVVxXfUSu+o1Z8R634AZUnFd9RK76jVvyAWvEVteJQeVXxh1ReVXxHrfgbqTypuKbyquI3qRWHyquKH1ArfkatuKBWfEet+AGVJxWHylHxO1SOij+k8qrix1ReVTxRK76iVvyNVF5VPKgVF9SK76gV31ErLqgVh8qrih9QK/6cyquK76gVF9SK76i8qjjUit+k8lDxkcqTih9QK35ArfiOWvEdlVcVP6BWXLNiZub/g8pR8QtUjoqZmb+RFTMz/x9UjopfoHJUzMz8jayYmfnbqRwVv0DlqJiZ+XtZMTPzt1M5Ku6m8lAxM/P3smJm5neoFW9UHipupfKkYmbm72XFzMwvUPlOxU1U3lTMzPztrJiZ+R0q1ypuovKmYmbm/4MVMzO/RuVNxd1UHipmZv7/WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPv8HPt/pW2VCUvMAAAAASUVORK5CYII="
416
+ }
417
+ }
418
+ ],
419
+ "type": "human"
420
+ }
421
+ },
422
+ {
423
+ "lc": 1,
424
+ "type": "constructor",
425
+ "id": [
426
+ "langchain",
427
+ "schema",
428
+ "messages",
429
+ "AIMessage"
430
+ ],
431
+ "kwargs": {
432
+ "content": "",
433
+ "type": "ai",
434
+ "tool_calls": [
435
+ {
436
+ "name": "ExtractedText",
437
+ "args": {
438
+ "text": ""
439
+ },
440
+ "id": "call_example_7",
441
+ "type": "tool_call"
442
+ }
443
+ ],
444
+ "invalid_tool_calls": []
445
+ }
446
+ },
447
+ {
448
+ "lc": 1,
449
+ "type": "constructor",
450
+ "id": [
451
+ "langchain",
452
+ "schema",
453
+ "messages",
454
+ "ToolMessage"
455
+ ],
456
+ "kwargs": {
457
+ "content": "",
458
+ "type": "tool",
459
+ "tool_call_id": "call_example_7",
460
+ "status": "success"
461
+ }
462
+ },
401
463
  {
402
464
  "lc": 1,
403
465
  "type": "constructor",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nl_processing
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Natural language processing playground
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -9,6 +9,7 @@ Requires-Dist: langchain<1,>=0.3
9
9
  Requires-Dist: langchain-openai<1,>=0.3
10
10
  Requires-Dist: opencv-python<5,>=4.10
11
11
  Requires-Dist: asyncpg<1,>=0.30
12
+ Requires-Dist: aiosqlite<1,>=0.20
12
13
 
13
14
  # nl_processing
14
15
 
@@ -12,7 +12,6 @@ nl_processing/core/models.py
12
12
  nl_processing/core/prompts.py
13
13
  nl_processing/core/scripts/prompt_author.py
14
14
  nl_processing/database/__init__.py
15
- nl_processing/database/cached_service.py
16
15
  nl_processing/database/exceptions.py
17
16
  nl_processing/database/exercise_progress.py
18
17
  nl_processing/database/logging.py
@@ -24,6 +23,14 @@ nl_processing/database/backend/_neon_exercise.py
24
23
  nl_processing/database/backend/_queries.py
25
24
  nl_processing/database/backend/abstract.py
26
25
  nl_processing/database/backend/neon.py
26
+ nl_processing/database_cache/__init__.py
27
+ nl_processing/database_cache/_local_store_queries.py
28
+ nl_processing/database_cache/exceptions.py
29
+ nl_processing/database_cache/local_store.py
30
+ nl_processing/database_cache/logging.py
31
+ nl_processing/database_cache/models.py
32
+ nl_processing/database_cache/service.py
33
+ nl_processing/database_cache/sync.py
27
34
  nl_processing/extract_text_from_image/__init__.py
28
35
  nl_processing/extract_text_from_image/benchmark.py
29
36
  nl_processing/extract_text_from_image/image_encoding.py
@@ -3,3 +3,4 @@ langchain<1,>=0.3
3
3
  langchain-openai<1,>=0.3
4
4
  opencv-python<5,>=4.10
5
5
  asyncpg<1,>=0.30
6
+ aiosqlite<1,>=0.20
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "nl_processing"
7
- version = "0.3.0"
7
+ version = "0.4.0"
8
8
  description = "Natural language processing playground"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -14,6 +14,7 @@ dependencies = [
14
14
  "langchain-openai>=0.3,<1",
15
15
  "opencv-python>=4.10,<5",
16
16
  "asyncpg>=0.30,<1",
17
+ "aiosqlite>=0.20,<1",
17
18
  ]
18
19
 
19
20
  [tool.setuptools.packages.find]
@@ -1,82 +0,0 @@
1
- """CachedDatabaseService — wraps DatabaseService with in-memory LRU cache.
2
-
3
- .. deprecated::
4
- Legacy prototype helper; superseded by planned database_cache module.
5
- """
6
-
7
- from nl_processing.core.models import Language, PartOfSpeech, Word
8
- from nl_processing.database.models import AddWordsResult, WordPair
9
- from nl_processing.database.service import DatabaseService
10
-
11
-
12
- class CachedDatabaseService:
13
- """Wraps DatabaseService with an in-memory LRU cache for get_words.
14
-
15
- .. deprecated::
16
- Legacy prototype helper; superseded by planned database_cache module.
17
- """
18
-
19
- def __init__(
20
- self,
21
- *,
22
- user_id: str,
23
- source_language: Language = Language.NL,
24
- target_language: Language = Language.RU,
25
- cache_max_size: int = 128,
26
- ) -> None:
27
- self._inner = DatabaseService(
28
- user_id=user_id,
29
- source_language=source_language,
30
- target_language=target_language,
31
- )
32
- self._cache: dict[tuple[str | None, int | None, bool], list[WordPair]] = {}
33
- self._cache_max_size = cache_max_size
34
- self._cache_order: list[tuple[str | None, int | None, bool]] = []
35
-
36
- async def add_words(self, words: list[Word]) -> AddWordsResult:
37
- """Delegate to inner service and clear the cache."""
38
- result = await self._inner.add_words(words)
39
- self._cache.clear()
40
- self._cache_order.clear()
41
- return result
42
-
43
- async def get_words(
44
- self,
45
- *,
46
- word_type: PartOfSpeech | None = None,
47
- limit: int | None = None,
48
- random: bool = False,
49
- ) -> list[WordPair]:
50
- """Return word pairs, serving from cache when possible.
51
-
52
- Random queries and zero-size caches bypass the cache entirely.
53
- """
54
- if random or self._cache_max_size <= 0:
55
- return await self._inner.get_words(
56
- word_type=word_type,
57
- limit=limit,
58
- random=random,
59
- )
60
-
61
- key = (word_type.value if word_type else None, limit, False)
62
- if key in self._cache:
63
- self._cache_order.remove(key)
64
- self._cache_order.append(key)
65
- return self._cache[key]
66
-
67
- result = await self._inner.get_words(
68
- word_type=word_type,
69
- limit=limit,
70
- random=random,
71
- )
72
- self._cache[key] = result
73
- self._cache_order.append(key)
74
- while len(self._cache_order) > self._cache_max_size:
75
- oldest = self._cache_order.pop(0)
76
- self._cache.pop(oldest, None)
77
- return result
78
-
79
- @classmethod
80
- async def create_tables(cls) -> None:
81
- """Delegate to DatabaseService.create_tables."""
82
- await DatabaseService.create_tables()
File without changes
File without changes