nl-processing 0.3.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nl_processing-0.3.0 → nl_processing-0.5.0}/PKG-INFO +2 -1
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/backend/_neon_exercise.py +27 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/backend/_queries.py +13 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/backend/abstract.py +25 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/backend/neon.py +33 -41
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/exercise_progress.py +12 -16
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/service.py +13 -0
- nl_processing-0.5.0/nl_processing/database_cache/_local_store_queries.py +58 -0
- nl_processing-0.5.0/nl_processing/database_cache/exceptions.py +10 -0
- nl_processing-0.5.0/nl_processing/database_cache/local_store.py +200 -0
- nl_processing-0.5.0/nl_processing/database_cache/logging.py +5 -0
- nl_processing-0.5.0/nl_processing/database_cache/models.py +12 -0
- nl_processing-0.5.0/nl_processing/database_cache/service.py +185 -0
- nl_processing-0.5.0/nl_processing/database_cache/sync.py +82 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/prompts/generate_nl_prompt.py +9 -2
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/prompts/nl.json +62 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/sampling/service.py +18 -6
- nl_processing-0.5.0/nl_processing/translate_word/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing.egg-info/PKG-INFO +2 -1
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing.egg-info/SOURCES.txt +8 -1
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing.egg-info/requires.txt +1 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/pyproject.toml +2 -1
- nl_processing-0.3.0/nl_processing/database/cached_service.py +0 -82
- {nl_processing-0.3.0 → nl_processing-0.5.0}/README.md +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/core/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/core/exceptions.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/core/models.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/core/prompts.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/core/scripts/prompt_author.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/backend/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/exceptions.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/logging.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/models.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/testing.py +0 -0
- {nl_processing-0.3.0/nl_processing/extract_words_from_text → nl_processing-0.5.0/nl_processing/database_cache}/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/benchmark.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/image_encoding.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_text_from_image/service.py +0 -0
- {nl_processing-0.3.0/nl_processing/sampling → nl_processing-0.5.0/nl_processing/extract_words_from_text}/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_words_from_text/prompts/generate_nl_prompt.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_words_from_text/prompts/nl.json +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/extract_words_from_text/service.py +0 -0
- {nl_processing-0.3.0/nl_processing/translate_text → nl_processing-0.5.0/nl_processing/sampling}/__init__.py +0 -0
- {nl_processing-0.3.0/nl_processing/translate_word → nl_processing-0.5.0/nl_processing/translate_text}/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_text/prompts/generate_nl_ru_prompt.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_text/prompts/nl_ru.json +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_text/service.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_word/prompts/generate_nl_ru_prompt.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_word/prompts/nl_ru.json +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/translate_word/service.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing.egg-info/dependency_links.txt +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing.egg-info/top_level.txt +0 -0
- {nl_processing-0.3.0 → nl_processing-0.5.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nl_processing
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Natural language processing playground
|
|
5
5
|
Requires-Python: >=3.12
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -9,6 +9,7 @@ Requires-Dist: langchain<1,>=0.3
|
|
|
9
9
|
Requires-Dist: langchain-openai<1,>=0.3
|
|
10
10
|
Requires-Dist: opencv-python<5,>=4.10
|
|
11
11
|
Requires-Dist: asyncpg<1,>=0.30
|
|
12
|
+
Requires-Dist: aiosqlite<1,>=0.20
|
|
12
13
|
|
|
13
14
|
# nl_processing
|
|
14
15
|
|
{nl_processing-0.3.0 → nl_processing-0.5.0}/nl_processing/database/backend/_neon_exercise.py
RENAMED
|
@@ -90,3 +90,30 @@ async def mark_event(
|
|
|
90
90
|
await conn.execute(mark_event_applied_query(table), event_id)
|
|
91
91
|
except asyncpg.PostgresError as exc:
|
|
92
92
|
raise DatabaseError(str(exc)) from exc
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def atomic_apply_delta(
|
|
96
|
+
conn: asyncpg.Connection, # type: ignore[type-arg]
|
|
97
|
+
score_table: str,
|
|
98
|
+
events_table: str,
|
|
99
|
+
user_id: str,
|
|
100
|
+
event_id: str,
|
|
101
|
+
source_word_id: int,
|
|
102
|
+
delta: int,
|
|
103
|
+
) -> bool:
|
|
104
|
+
"""Atomically check-apply-mark a score delta in one transaction."""
|
|
105
|
+
try:
|
|
106
|
+
async with conn.transaction():
|
|
107
|
+
already = await conn.fetchrow(check_event_applied_query(events_table), event_id)
|
|
108
|
+
if already is not None:
|
|
109
|
+
return False
|
|
110
|
+
await conn.fetchrow(
|
|
111
|
+
increment_score_query(score_table),
|
|
112
|
+
user_id,
|
|
113
|
+
source_word_id,
|
|
114
|
+
delta,
|
|
115
|
+
)
|
|
116
|
+
await conn.execute(mark_event_applied_query(events_table), event_id)
|
|
117
|
+
return True
|
|
118
|
+
except asyncpg.PostgresError as exc:
|
|
119
|
+
raise DatabaseError(str(exc)) from exc
|
|
@@ -133,6 +133,19 @@ def get_user_words_query(
|
|
|
133
133
|
return query
|
|
134
134
|
|
|
135
135
|
|
|
136
|
+
def count_user_words_query(language: str, word_type: str | None) -> str:
|
|
137
|
+
# Table name from Language enum value, not user input # noqa: S608
|
|
138
|
+
query = f"""
|
|
139
|
+
SELECT COUNT(*) AS cnt
|
|
140
|
+
FROM user_words uw
|
|
141
|
+
JOIN words_{language} w ON uw.word_id = w.id
|
|
142
|
+
WHERE uw.user_id = $1 AND uw.language = $2
|
|
143
|
+
""" # noqa: S608
|
|
144
|
+
if word_type is not None:
|
|
145
|
+
query += " AND w.word_type = $3"
|
|
146
|
+
return query
|
|
147
|
+
|
|
148
|
+
|
|
136
149
|
def increment_score_query(table: str) -> str:
|
|
137
150
|
# Table name from Language enum values, not user input # noqa: S608
|
|
138
151
|
return f"""
|
|
@@ -53,6 +53,15 @@ class AbstractBackend(ABC):
|
|
|
53
53
|
and random ordering.
|
|
54
54
|
"""
|
|
55
55
|
|
|
56
|
+
@abstractmethod
|
|
57
|
+
async def count_user_words(
|
|
58
|
+
self,
|
|
59
|
+
user_id: str,
|
|
60
|
+
language: str,
|
|
61
|
+
word_type: str | None = None,
|
|
62
|
+
) -> int:
|
|
63
|
+
"""Return total user-word associations for the given user and language."""
|
|
64
|
+
|
|
56
65
|
@abstractmethod
|
|
57
66
|
async def add_user_word(
|
|
58
67
|
self,
|
|
@@ -103,6 +112,22 @@ class AbstractBackend(ABC):
|
|
|
103
112
|
) -> None:
|
|
104
113
|
"""Insert event_id into the applied_events table."""
|
|
105
114
|
|
|
115
|
+
@abstractmethod
|
|
116
|
+
async def apply_score_delta_atomic(
|
|
117
|
+
self,
|
|
118
|
+
score_table: str,
|
|
119
|
+
events_table: str,
|
|
120
|
+
user_id: str,
|
|
121
|
+
event_id: str,
|
|
122
|
+
source_word_id: int,
|
|
123
|
+
delta: int,
|
|
124
|
+
) -> bool:
|
|
125
|
+
"""Atomically check-apply-mark a score delta in one transaction.
|
|
126
|
+
|
|
127
|
+
Returns True if the delta was applied, False if event_id was already applied.
|
|
128
|
+
The entire operation (check + increment + mark) runs in a single transaction.
|
|
129
|
+
"""
|
|
130
|
+
|
|
106
131
|
@abstractmethod
|
|
107
132
|
async def create_tables(
|
|
108
133
|
self,
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
"""NeonBackend
|
|
1
|
+
"""NeonBackend asyncpg implementation for Neon PostgreSQL."""
|
|
2
2
|
|
|
3
3
|
import asyncpg
|
|
4
4
|
|
|
5
5
|
from nl_processing.database.backend._neon_exercise import (
|
|
6
|
+
atomic_apply_delta,
|
|
6
7
|
check_event,
|
|
7
8
|
create_exercise_tables,
|
|
8
9
|
get_scores,
|
|
@@ -14,6 +15,7 @@ from nl_processing.database.backend._queries import (
|
|
|
14
15
|
CREATE_USER_WORDS,
|
|
15
16
|
add_translation_link_query,
|
|
16
17
|
add_word_query,
|
|
18
|
+
count_user_words_query,
|
|
17
19
|
create_translations_table,
|
|
18
20
|
create_words_table,
|
|
19
21
|
get_user_words_query,
|
|
@@ -34,7 +36,6 @@ class NeonBackend(AbstractBackend):
|
|
|
34
36
|
self._connection: asyncpg.Connection | None = None # type: ignore[type-arg]
|
|
35
37
|
|
|
36
38
|
async def _connect(self) -> asyncpg.Connection: # type: ignore[type-arg]
|
|
37
|
-
"""Return cached connection, creating it lazily on first call."""
|
|
38
39
|
if self._connection is None:
|
|
39
40
|
try:
|
|
40
41
|
self._connection = await asyncpg.connect(dsn=self._database_url)
|
|
@@ -43,13 +44,12 @@ class NeonBackend(AbstractBackend):
|
|
|
43
44
|
raise DatabaseError(str(exc)) from exc
|
|
44
45
|
except OSError as exc:
|
|
45
46
|
raise DatabaseError(str(exc)) from exc
|
|
47
|
+
if self._connection is None:
|
|
48
|
+
raise DatabaseError("Database connection was not initialized")
|
|
46
49
|
return self._connection
|
|
47
50
|
|
|
48
51
|
async def create_tables(
|
|
49
|
-
self,
|
|
50
|
-
languages: list[str],
|
|
51
|
-
pairs: list[tuple[str, str]],
|
|
52
|
-
exercise_slugs: list[str],
|
|
52
|
+
self, languages: list[str], pairs: list[tuple[str, str]], exercise_slugs: list[str]
|
|
53
53
|
) -> None:
|
|
54
54
|
conn = await self._connect()
|
|
55
55
|
try:
|
|
@@ -78,11 +78,7 @@ class NeonBackend(AbstractBackend):
|
|
|
78
78
|
return None
|
|
79
79
|
return int(row["id"])
|
|
80
80
|
|
|
81
|
-
async def get_word(
|
|
82
|
-
self,
|
|
83
|
-
table: str,
|
|
84
|
-
normalized_form: str,
|
|
85
|
-
) -> dict[str, str | int] | None:
|
|
81
|
+
async def get_word(self, table: str, normalized_form: str) -> dict[str, str | int] | None:
|
|
86
82
|
conn = await self._connect()
|
|
87
83
|
try:
|
|
88
84
|
row = await conn.fetchrow(get_word_query(table), normalized_form)
|
|
@@ -96,24 +92,14 @@ class NeonBackend(AbstractBackend):
|
|
|
96
92
|
"word_type": row["word_type"],
|
|
97
93
|
}
|
|
98
94
|
|
|
99
|
-
async def add_translation_link(
|
|
100
|
-
self,
|
|
101
|
-
table: str,
|
|
102
|
-
source_id: int,
|
|
103
|
-
target_id: int,
|
|
104
|
-
) -> None:
|
|
95
|
+
async def add_translation_link(self, table: str, source_id: int, target_id: int) -> None:
|
|
105
96
|
conn = await self._connect()
|
|
106
97
|
try:
|
|
107
98
|
await conn.execute(add_translation_link_query(table), source_id, target_id)
|
|
108
99
|
except asyncpg.PostgresError as exc:
|
|
109
100
|
raise DatabaseError(str(exc)) from exc
|
|
110
101
|
|
|
111
|
-
async def add_user_word(
|
|
112
|
-
self,
|
|
113
|
-
user_id: str,
|
|
114
|
-
word_id: int,
|
|
115
|
-
language: str,
|
|
116
|
-
) -> None:
|
|
102
|
+
async def add_user_word(self, user_id: str, word_id: int, language: str) -> None:
|
|
117
103
|
conn = await self._connect()
|
|
118
104
|
try:
|
|
119
105
|
await conn.execute(ADD_USER_WORD, user_id, word_id, language)
|
|
@@ -144,6 +130,19 @@ class NeonBackend(AbstractBackend):
|
|
|
144
130
|
raise DatabaseError(str(exc)) from exc
|
|
145
131
|
return [dict(row) for row in rows]
|
|
146
132
|
|
|
133
|
+
async def count_user_words(self, user_id: str, language: str, word_type: str | None = None) -> int:
|
|
134
|
+
conn = await self._connect()
|
|
135
|
+
args: list[str] = [user_id, language]
|
|
136
|
+
if word_type is not None:
|
|
137
|
+
args.append(word_type)
|
|
138
|
+
try:
|
|
139
|
+
count = await conn.fetchval(count_user_words_query(language, word_type), *args)
|
|
140
|
+
except asyncpg.PostgresError as exc:
|
|
141
|
+
raise DatabaseError(str(exc)) from exc
|
|
142
|
+
if count is None:
|
|
143
|
+
return 0
|
|
144
|
+
return int(count)
|
|
145
|
+
|
|
147
146
|
async def increment_user_exercise_score(
|
|
148
147
|
self,
|
|
149
148
|
table: str,
|
|
@@ -155,36 +154,29 @@ class NeonBackend(AbstractBackend):
|
|
|
155
154
|
return await increment_score(conn, table, user_id, source_word_id, delta)
|
|
156
155
|
|
|
157
156
|
async def get_user_exercise_scores(
|
|
158
|
-
self,
|
|
159
|
-
table: str,
|
|
160
|
-
user_id: str,
|
|
161
|
-
source_word_ids: list[int],
|
|
157
|
+
self, table: str, user_id: str, source_word_ids: list[int]
|
|
162
158
|
) -> list[dict[str, str | int]]:
|
|
163
159
|
conn = await self._connect()
|
|
164
160
|
return await get_scores(conn, table, user_id, source_word_ids)
|
|
165
161
|
|
|
166
|
-
async def check_event_applied(
|
|
167
|
-
self,
|
|
168
|
-
table: str,
|
|
169
|
-
event_id: str,
|
|
170
|
-
) -> bool:
|
|
162
|
+
async def check_event_applied(self, table: str, event_id: str) -> bool:
|
|
171
163
|
conn = await self._connect()
|
|
172
164
|
return await check_event(conn, table, event_id)
|
|
173
165
|
|
|
174
|
-
async def mark_event_applied(
|
|
175
|
-
self,
|
|
176
|
-
table: str,
|
|
177
|
-
event_id: str,
|
|
178
|
-
) -> None:
|
|
166
|
+
async def mark_event_applied(self, table: str, event_id: str) -> None:
|
|
179
167
|
conn = await self._connect()
|
|
180
168
|
await mark_event(conn, table, event_id)
|
|
181
169
|
|
|
170
|
+
async def apply_score_delta_atomic(
|
|
171
|
+
self, score_table: str, events_table: str,
|
|
172
|
+
user_id: str, event_id: str, source_word_id: int, delta: int,
|
|
173
|
+
) -> bool: # fmt: skip
|
|
174
|
+
conn = await self._connect()
|
|
175
|
+
return await atomic_apply_delta(conn, score_table, events_table, user_id, event_id, source_word_id, delta)
|
|
182
176
|
|
|
183
|
-
def _infer_target_language(source_language: str) -> str:
|
|
184
|
-
"""Infer the target language for translation lookups.
|
|
185
177
|
|
|
186
|
-
|
|
187
|
-
"""
|
|
178
|
+
def _infer_target_language(source_language: str) -> str:
|
|
179
|
+
"""Infer the other language in the nl/ru pair."""
|
|
188
180
|
if source_language == "nl":
|
|
189
181
|
return "ru"
|
|
190
182
|
return "nl"
|
|
@@ -119,25 +119,21 @@ class ExerciseProgressStore:
|
|
|
119
119
|
) -> None:
|
|
120
120
|
"""Apply a score delta idempotently, guarded by event deduplication.
|
|
121
121
|
|
|
122
|
-
Validates exercise_type. Skips if event_id was already applied.
|
|
122
|
+
Validates exercise_type and delta. Skips if event_id was already applied.
|
|
123
|
+
The check-increment-mark operation is atomic (single transaction).
|
|
123
124
|
"""
|
|
124
125
|
self._validate_exercise_type(exercise_type)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
)
|
|
129
|
-
if already_applied:
|
|
130
|
-
return
|
|
126
|
+
if delta not in (1, -1):
|
|
127
|
+
msg = f"delta must be +1 or -1, got {delta}"
|
|
128
|
+
raise ValueError(msg)
|
|
131
129
|
table = self._score_tables[exercise_type]
|
|
132
|
-
await self._backend.
|
|
133
|
-
table,
|
|
134
|
-
self.
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
self._applied_events_table,
|
|
140
|
-
event_id,
|
|
130
|
+
await self._backend.apply_score_delta_atomic(
|
|
131
|
+
score_table=table,
|
|
132
|
+
events_table=self._applied_events_table,
|
|
133
|
+
user_id=self._user_id,
|
|
134
|
+
event_id=event_id,
|
|
135
|
+
source_word_id=source_word_id,
|
|
136
|
+
delta=delta,
|
|
141
137
|
)
|
|
142
138
|
|
|
143
139
|
def _validate_exercise_type(self, exercise_type: str) -> None:
|
|
@@ -140,6 +140,19 @@ class DatabaseService:
|
|
|
140
140
|
language=self._target_language,
|
|
141
141
|
)
|
|
142
142
|
pairs.append(WordPair(source=source, target=target))
|
|
143
|
+
if limit is None and not random:
|
|
144
|
+
total_count = await self._backend.count_user_words(
|
|
145
|
+
self._user_id,
|
|
146
|
+
self._source_language.value,
|
|
147
|
+
word_type=word_type.value if word_type else None,
|
|
148
|
+
)
|
|
149
|
+
if total_count > len(pairs):
|
|
150
|
+
excluded_count = total_count - len(pairs)
|
|
151
|
+
_logger.warning(
|
|
152
|
+
"%d of %d words excluded from get_words() due to missing translations",
|
|
153
|
+
excluded_count,
|
|
154
|
+
total_count,
|
|
155
|
+
)
|
|
143
156
|
return pairs
|
|
144
157
|
|
|
145
158
|
@classmethod
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""DDL and query constants for the local SQLite cache store."""
|
|
2
|
+
|
|
3
|
+
DDL_CACHED_WORD_PAIRS = """
|
|
4
|
+
CREATE TABLE IF NOT EXISTS cached_word_pairs (
|
|
5
|
+
source_word_id INTEGER PRIMARY KEY,
|
|
6
|
+
source_normalized_form TEXT NOT NULL,
|
|
7
|
+
source_word_type TEXT NOT NULL,
|
|
8
|
+
target_word_id INTEGER NOT NULL,
|
|
9
|
+
target_normalized_form TEXT NOT NULL,
|
|
10
|
+
target_word_type TEXT NOT NULL
|
|
11
|
+
)"""
|
|
12
|
+
|
|
13
|
+
DDL_CACHED_SCORES = """
|
|
14
|
+
CREATE TABLE IF NOT EXISTS cached_scores (
|
|
15
|
+
source_word_id INTEGER NOT NULL,
|
|
16
|
+
exercise_type TEXT NOT NULL,
|
|
17
|
+
score INTEGER NOT NULL DEFAULT 0,
|
|
18
|
+
updated_at TEXT NOT NULL,
|
|
19
|
+
PRIMARY KEY (source_word_id, exercise_type)
|
|
20
|
+
)"""
|
|
21
|
+
|
|
22
|
+
DDL_PENDING_SCORE_EVENTS = """
|
|
23
|
+
CREATE TABLE IF NOT EXISTS pending_score_events (
|
|
24
|
+
event_id TEXT PRIMARY KEY,
|
|
25
|
+
source_word_id INTEGER NOT NULL,
|
|
26
|
+
exercise_type TEXT NOT NULL,
|
|
27
|
+
delta INTEGER NOT NULL,
|
|
28
|
+
created_at TEXT NOT NULL,
|
|
29
|
+
flushed_at TEXT,
|
|
30
|
+
last_error TEXT
|
|
31
|
+
)"""
|
|
32
|
+
|
|
33
|
+
DDL_CACHE_METADATA = """
|
|
34
|
+
CREATE TABLE IF NOT EXISTS cache_metadata (
|
|
35
|
+
id INTEGER PRIMARY KEY DEFAULT 1,
|
|
36
|
+
exercise_types TEXT NOT NULL,
|
|
37
|
+
schema_version INTEGER NOT NULL DEFAULT 1,
|
|
38
|
+
last_refresh_started_at TEXT,
|
|
39
|
+
last_refresh_completed_at TEXT,
|
|
40
|
+
last_flush_completed_at TEXT,
|
|
41
|
+
last_error TEXT
|
|
42
|
+
)"""
|
|
43
|
+
|
|
44
|
+
ALL_DDL = [DDL_CACHED_WORD_PAIRS, DDL_CACHED_SCORES, DDL_PENDING_SCORE_EVENTS, DDL_CACHE_METADATA]
|
|
45
|
+
|
|
46
|
+
UPSERT_SCORE = (
|
|
47
|
+
"INSERT INTO cached_scores (source_word_id, exercise_type, score, updated_at) VALUES (?, ?, ?, ?)"
|
|
48
|
+
" ON CONFLICT(source_word_id, exercise_type) DO UPDATE SET score = score + ?, updated_at = ?"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
INSERT_PENDING_EVENT = (
|
|
52
|
+
"INSERT INTO pending_score_events (event_id, source_word_id, exercise_type, delta, created_at)"
|
|
53
|
+
" VALUES (?, ?, ?, ?, ?)"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
INSERT_WORD_PAIR = "INSERT INTO cached_word_pairs VALUES (?, ?, ?, ?, ?, ?)"
|
|
57
|
+
|
|
58
|
+
INSERT_SCORE = "INSERT INTO cached_scores (source_word_id, exercise_type, score, updated_at) VALUES (?, ?, ?, ?)"
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
class CacheNotReadyError(Exception):
|
|
2
|
+
"""Raised when cached data is requested before the first usable snapshot exists."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class CacheStorageError(Exception):
|
|
6
|
+
"""Raised when the local SQLite cache file cannot be opened, read, or updated."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CacheSyncError(Exception):
|
|
10
|
+
"""Raised when an explicit refresh or flush operation fails synchronously."""
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""SQLite data-access layer for the local word-pair / score cache."""
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
import json
|
|
5
|
+
import sqlite3
|
|
6
|
+
|
|
7
|
+
import aiosqlite
|
|
8
|
+
|
|
9
|
+
from nl_processing.database_cache._local_store_queries import (
|
|
10
|
+
ALL_DDL,
|
|
11
|
+
INSERT_PENDING_EVENT,
|
|
12
|
+
INSERT_SCORE,
|
|
13
|
+
INSERT_WORD_PAIR,
|
|
14
|
+
UPSERT_SCORE,
|
|
15
|
+
)
|
|
16
|
+
from nl_processing.database_cache.exceptions import CacheStorageError
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _now() -> str:
|
|
20
|
+
return datetime.now(tz=UTC).isoformat()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LocalStore:
|
|
24
|
+
"""Async SQLite store for cached word pairs, scores, and pending events."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, db_path: str) -> None:
|
|
27
|
+
self._db_path = db_path
|
|
28
|
+
self._db: aiosqlite.Connection | None = None
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def _conn(self) -> aiosqlite.Connection:
|
|
32
|
+
if self._db is None:
|
|
33
|
+
raise CacheStorageError("LocalStore is not open")
|
|
34
|
+
return self._db
|
|
35
|
+
|
|
36
|
+
async def open(self) -> None:
|
|
37
|
+
"""Open the SQLite connection and create tables."""
|
|
38
|
+
try:
|
|
39
|
+
self._db = await aiosqlite.connect(self._db_path)
|
|
40
|
+
self._db.row_factory = aiosqlite.Row
|
|
41
|
+
await self._db.execute("PRAGMA journal_mode=WAL")
|
|
42
|
+
for ddl in ALL_DDL:
|
|
43
|
+
await self._db.execute(ddl)
|
|
44
|
+
await self._db.commit()
|
|
45
|
+
except sqlite3.Error as exc:
|
|
46
|
+
raise CacheStorageError(str(exc)) from exc
|
|
47
|
+
|
|
48
|
+
async def close(self) -> None:
|
|
49
|
+
"""Close the SQLite connection."""
|
|
50
|
+
if self._db:
|
|
51
|
+
await self._db.close()
|
|
52
|
+
self._db = None
|
|
53
|
+
|
|
54
|
+
async def get_cached_word_pairs(
|
|
55
|
+
self,
|
|
56
|
+
word_type: str | None = None,
|
|
57
|
+
limit: int | None = None,
|
|
58
|
+
*,
|
|
59
|
+
random: bool = False,
|
|
60
|
+
) -> list[dict[str, str | int]]:
|
|
61
|
+
"""Query cached word pairs with optional filter, limit, and random ordering."""
|
|
62
|
+
sql = "SELECT * FROM cached_word_pairs"
|
|
63
|
+
params: list[str | int] = []
|
|
64
|
+
if word_type is not None:
|
|
65
|
+
sql += " WHERE source_word_type = ?"
|
|
66
|
+
params.append(word_type)
|
|
67
|
+
if random:
|
|
68
|
+
sql += " ORDER BY RANDOM()"
|
|
69
|
+
if limit is not None:
|
|
70
|
+
sql += " LIMIT ?"
|
|
71
|
+
params.append(limit)
|
|
72
|
+
return await self._fetch_all(sql, params)
|
|
73
|
+
|
|
74
|
+
async def get_cached_word_pairs_with_scores(self, exercise_types: list[str]) -> list[dict[str, str | int]]:
|
|
75
|
+
"""Query word pairs and attach scores per exercise type (missing = 0)."""
|
|
76
|
+
try:
|
|
77
|
+
rows = await self._fetch_all("SELECT * FROM cached_word_pairs")
|
|
78
|
+
for row in rows:
|
|
79
|
+
for et in exercise_types:
|
|
80
|
+
sc = await self._conn.execute(
|
|
81
|
+
"SELECT score FROM cached_scores WHERE source_word_id=? AND exercise_type=?",
|
|
82
|
+
(row["source_word_id"], et),
|
|
83
|
+
)
|
|
84
|
+
score_row = await sc.fetchone()
|
|
85
|
+
row[f"score_{et}"] = int(score_row["score"]) if score_row else 0
|
|
86
|
+
return rows
|
|
87
|
+
except sqlite3.Error as exc:
|
|
88
|
+
raise CacheStorageError(str(exc)) from exc
|
|
89
|
+
|
|
90
|
+
async def get_pending_events(self) -> list[dict[str, str | int]]:
|
|
91
|
+
return await self._fetch_all("SELECT * FROM pending_score_events WHERE flushed_at IS NULL ORDER BY created_at")
|
|
92
|
+
|
|
93
|
+
async def get_pending_event_count(self) -> int:
|
|
94
|
+
try:
|
|
95
|
+
cur = await self._conn.execute("SELECT COUNT(*) FROM pending_score_events WHERE flushed_at IS NULL")
|
|
96
|
+
row = await cur.fetchone()
|
|
97
|
+
return int(row[0]) if row else 0
|
|
98
|
+
except sqlite3.Error as exc:
|
|
99
|
+
raise CacheStorageError(str(exc)) from exc
|
|
100
|
+
|
|
101
|
+
async def get_metadata(self) -> dict[str, str | int] | None:
|
|
102
|
+
try:
|
|
103
|
+
cur = await self._conn.execute("SELECT * FROM cache_metadata WHERE id = 1")
|
|
104
|
+
row = await cur.fetchone()
|
|
105
|
+
return dict(row) if row else None
|
|
106
|
+
except sqlite3.Error as exc:
|
|
107
|
+
raise CacheStorageError(str(exc)) from exc
|
|
108
|
+
|
|
109
|
+
async def has_snapshot(self) -> bool:
|
|
110
|
+
try:
|
|
111
|
+
cur = await self._conn.execute("SELECT 1 FROM cached_word_pairs LIMIT 1")
|
|
112
|
+
return (await cur.fetchone()) is not None
|
|
113
|
+
except sqlite3.Error as exc:
|
|
114
|
+
raise CacheStorageError(str(exc)) from exc
|
|
115
|
+
|
|
116
|
+
async def record_score_and_event(
|
|
117
|
+
self,
|
|
118
|
+
source_word_id: int,
|
|
119
|
+
exercise_type: str,
|
|
120
|
+
delta: int,
|
|
121
|
+
event_id: str,
|
|
122
|
+
) -> None:
|
|
123
|
+
"""Atomically upsert a cached score and insert a pending event."""
|
|
124
|
+
now = _now()
|
|
125
|
+
try:
|
|
126
|
+
await self._conn.execute(UPSERT_SCORE, (source_word_id, exercise_type, delta, now, delta, now))
|
|
127
|
+
await self._conn.execute(INSERT_PENDING_EVENT, (event_id, source_word_id, exercise_type, delta, now))
|
|
128
|
+
await self._conn.commit()
|
|
129
|
+
except sqlite3.Error as exc:
|
|
130
|
+
raise CacheStorageError(str(exc)) from exc
|
|
131
|
+
|
|
132
|
+
async def rebuild_snapshot(
|
|
133
|
+
self,
|
|
134
|
+
word_pairs: list[tuple[int, str, str, int, str, str]],
|
|
135
|
+
scores: dict[tuple[int, str], int],
|
|
136
|
+
) -> None:
|
|
137
|
+
"""Atomically replace cached word pairs and scores, then reapply pending events."""
|
|
138
|
+
now = _now()
|
|
139
|
+
try:
|
|
140
|
+
await self._conn.execute("DELETE FROM cached_word_pairs")
|
|
141
|
+
await self._conn.execute("DELETE FROM cached_scores")
|
|
142
|
+
for wp in word_pairs:
|
|
143
|
+
await self._conn.execute(INSERT_WORD_PAIR, wp)
|
|
144
|
+
for (wid, et), score in scores.items():
|
|
145
|
+
await self._conn.execute(INSERT_SCORE, (wid, et, score, now))
|
|
146
|
+
for evt in await self.get_pending_events():
|
|
147
|
+
await self._conn.execute(
|
|
148
|
+
UPSERT_SCORE,
|
|
149
|
+
(evt["source_word_id"], evt["exercise_type"], evt["delta"], now, evt["delta"], now),
|
|
150
|
+
)
|
|
151
|
+
await self._conn.commit()
|
|
152
|
+
except sqlite3.Error as exc:
|
|
153
|
+
raise CacheStorageError(str(exc)) from exc
|
|
154
|
+
|
|
155
|
+
async def mark_event_flushed(self, event_id: str) -> None:
|
|
156
|
+
await self._exec_commit("UPDATE pending_score_events SET flushed_at=? WHERE event_id=?", (_now(), event_id))
|
|
157
|
+
|
|
158
|
+
async def mark_event_failed(self, event_id: str, error: str) -> None:
|
|
159
|
+
await self._exec_commit("UPDATE pending_score_events SET last_error=? WHERE event_id=?", (error, event_id))
|
|
160
|
+
|
|
161
|
+
async def update_metadata(self, **fields: str | int | None) -> None:
|
|
162
|
+
if not fields:
|
|
163
|
+
return
|
|
164
|
+
set_clause = ", ".join(f"{k} = ?" for k in fields)
|
|
165
|
+
await self._exec_commit(
|
|
166
|
+
f"UPDATE cache_metadata SET {set_clause} WHERE id = 1", # noqa: S608
|
|
167
|
+
tuple(fields.values()),
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
async def ensure_metadata(self, exercise_types: list[str]) -> None:
|
|
171
|
+
await self._exec_commit(
|
|
172
|
+
"INSERT OR REPLACE INTO cache_metadata (id, exercise_types, schema_version) VALUES (1, ?, 1)",
|
|
173
|
+
(json.dumps(exercise_types),),
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
async def get_source_word_id(self, normalized_form: str, word_type: str) -> int | None:
|
|
177
|
+
"""Look up a source_word_id from cached_word_pairs."""
|
|
178
|
+
try:
|
|
179
|
+
cur = await self._conn.execute(
|
|
180
|
+
"SELECT source_word_id FROM cached_word_pairs WHERE source_normalized_form=? AND source_word_type=?",
|
|
181
|
+
(normalized_form, word_type),
|
|
182
|
+
)
|
|
183
|
+
row = await cur.fetchone()
|
|
184
|
+
return int(row["source_word_id"]) if row else None
|
|
185
|
+
except sqlite3.Error as exc:
|
|
186
|
+
raise CacheStorageError(str(exc)) from exc
|
|
187
|
+
|
|
188
|
+
async def _fetch_all(self, sql: str, params: list[str | int] | None = None) -> list[dict[str, str | int]]:
|
|
189
|
+
try:
|
|
190
|
+
cur = await self._conn.execute(sql, params or [])
|
|
191
|
+
return [dict(row) for row in await cur.fetchall()]
|
|
192
|
+
except sqlite3.Error as exc:
|
|
193
|
+
raise CacheStorageError(str(exc)) from exc
|
|
194
|
+
|
|
195
|
+
async def _exec_commit(self, sql: str, params: tuple[str | int | None, ...]) -> None:
|
|
196
|
+
try:
|
|
197
|
+
await self._conn.execute(sql, params)
|
|
198
|
+
await self._conn.commit()
|
|
199
|
+
except sqlite3.Error as exc:
|
|
200
|
+
raise CacheStorageError(str(exc)) from exc
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CacheStatus(BaseModel):
|
|
7
|
+
is_ready: bool
|
|
8
|
+
is_stale: bool
|
|
9
|
+
has_snapshot: bool
|
|
10
|
+
pending_events: int
|
|
11
|
+
last_refresh_completed_at: datetime | None
|
|
12
|
+
last_flush_completed_at: datetime | None
|