nl-processing 0.3.0__tar.gz → 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nl_processing-0.3.0 → nl_processing-0.4.0}/PKG-INFO +2 -1
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/_neon_exercise.py +27 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/_queries.py +13 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/abstract.py +25 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/neon.py +33 -41
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/exercise_progress.py +12 -16
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/service.py +13 -0
- nl_processing-0.4.0/nl_processing/database_cache/_local_store_queries.py +58 -0
- nl_processing-0.4.0/nl_processing/database_cache/exceptions.py +10 -0
- nl_processing-0.4.0/nl_processing/database_cache/local_store.py +200 -0
- nl_processing-0.4.0/nl_processing/database_cache/logging.py +5 -0
- nl_processing-0.4.0/nl_processing/database_cache/models.py +12 -0
- nl_processing-0.4.0/nl_processing/database_cache/service.py +185 -0
- nl_processing-0.4.0/nl_processing/database_cache/sync.py +82 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/prompts/generate_nl_prompt.py +9 -2
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/prompts/nl.json +62 -0
- nl_processing-0.4.0/nl_processing/translate_word/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/PKG-INFO +2 -1
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/SOURCES.txt +8 -1
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/requires.txt +1 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/pyproject.toml +2 -1
- nl_processing-0.3.0/nl_processing/database/cached_service.py +0 -82
- {nl_processing-0.3.0 → nl_processing-0.4.0}/README.md +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/core/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/core/exceptions.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/core/models.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/core/prompts.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/core/scripts/prompt_author.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/exceptions.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/logging.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/models.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/testing.py +0 -0
- {nl_processing-0.3.0/nl_processing/extract_words_from_text → nl_processing-0.4.0/nl_processing/database_cache}/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/benchmark.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/image_encoding.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/service.py +0 -0
- {nl_processing-0.3.0/nl_processing/sampling → nl_processing-0.4.0/nl_processing/extract_words_from_text}/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_words_from_text/prompts/generate_nl_prompt.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_words_from_text/prompts/nl.json +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_words_from_text/service.py +0 -0
- {nl_processing-0.3.0/nl_processing/translate_text → nl_processing-0.4.0/nl_processing/sampling}/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/sampling/service.py +0 -0
- {nl_processing-0.3.0/nl_processing/translate_word → nl_processing-0.4.0/nl_processing/translate_text}/__init__.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_text/prompts/generate_nl_ru_prompt.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_text/prompts/nl_ru.json +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_text/service.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_word/prompts/generate_nl_ru_prompt.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_word/prompts/nl_ru.json +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/translate_word/service.py +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/dependency_links.txt +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/top_level.txt +0 -0
- {nl_processing-0.3.0 → nl_processing-0.4.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nl_processing
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Natural language processing playground
|
|
5
5
|
Requires-Python: >=3.12
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -9,6 +9,7 @@ Requires-Dist: langchain<1,>=0.3
|
|
|
9
9
|
Requires-Dist: langchain-openai<1,>=0.3
|
|
10
10
|
Requires-Dist: opencv-python<5,>=4.10
|
|
11
11
|
Requires-Dist: asyncpg<1,>=0.30
|
|
12
|
+
Requires-Dist: aiosqlite<1,>=0.20
|
|
12
13
|
|
|
13
14
|
# nl_processing
|
|
14
15
|
|
{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/_neon_exercise.py
RENAMED
|
@@ -90,3 +90,30 @@ async def mark_event(
|
|
|
90
90
|
await conn.execute(mark_event_applied_query(table), event_id)
|
|
91
91
|
except asyncpg.PostgresError as exc:
|
|
92
92
|
raise DatabaseError(str(exc)) from exc
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def atomic_apply_delta(
|
|
96
|
+
conn: asyncpg.Connection, # type: ignore[type-arg]
|
|
97
|
+
score_table: str,
|
|
98
|
+
events_table: str,
|
|
99
|
+
user_id: str,
|
|
100
|
+
event_id: str,
|
|
101
|
+
source_word_id: int,
|
|
102
|
+
delta: int,
|
|
103
|
+
) -> bool:
|
|
104
|
+
"""Atomically check-apply-mark a score delta in one transaction."""
|
|
105
|
+
try:
|
|
106
|
+
async with conn.transaction():
|
|
107
|
+
already = await conn.fetchrow(check_event_applied_query(events_table), event_id)
|
|
108
|
+
if already is not None:
|
|
109
|
+
return False
|
|
110
|
+
await conn.fetchrow(
|
|
111
|
+
increment_score_query(score_table),
|
|
112
|
+
user_id,
|
|
113
|
+
source_word_id,
|
|
114
|
+
delta,
|
|
115
|
+
)
|
|
116
|
+
await conn.execute(mark_event_applied_query(events_table), event_id)
|
|
117
|
+
return True
|
|
118
|
+
except asyncpg.PostgresError as exc:
|
|
119
|
+
raise DatabaseError(str(exc)) from exc
|
|
@@ -133,6 +133,19 @@ def get_user_words_query(
|
|
|
133
133
|
return query
|
|
134
134
|
|
|
135
135
|
|
|
136
|
+
def count_user_words_query(language: str, word_type: str | None) -> str:
|
|
137
|
+
# Table name from Language enum value, not user input # noqa: S608
|
|
138
|
+
query = f"""
|
|
139
|
+
SELECT COUNT(*) AS cnt
|
|
140
|
+
FROM user_words uw
|
|
141
|
+
JOIN words_{language} w ON uw.word_id = w.id
|
|
142
|
+
WHERE uw.user_id = $1 AND uw.language = $2
|
|
143
|
+
""" # noqa: S608
|
|
144
|
+
if word_type is not None:
|
|
145
|
+
query += " AND w.word_type = $3"
|
|
146
|
+
return query
|
|
147
|
+
|
|
148
|
+
|
|
136
149
|
def increment_score_query(table: str) -> str:
|
|
137
150
|
# Table name from Language enum values, not user input # noqa: S608
|
|
138
151
|
return f"""
|
|
@@ -53,6 +53,15 @@ class AbstractBackend(ABC):
|
|
|
53
53
|
and random ordering.
|
|
54
54
|
"""
|
|
55
55
|
|
|
56
|
+
@abstractmethod
|
|
57
|
+
async def count_user_words(
|
|
58
|
+
self,
|
|
59
|
+
user_id: str,
|
|
60
|
+
language: str,
|
|
61
|
+
word_type: str | None = None,
|
|
62
|
+
) -> int:
|
|
63
|
+
"""Return total user-word associations for the given user and language."""
|
|
64
|
+
|
|
56
65
|
@abstractmethod
|
|
57
66
|
async def add_user_word(
|
|
58
67
|
self,
|
|
@@ -103,6 +112,22 @@ class AbstractBackend(ABC):
|
|
|
103
112
|
) -> None:
|
|
104
113
|
"""Insert event_id into the applied_events table."""
|
|
105
114
|
|
|
115
|
+
@abstractmethod
|
|
116
|
+
async def apply_score_delta_atomic(
|
|
117
|
+
self,
|
|
118
|
+
score_table: str,
|
|
119
|
+
events_table: str,
|
|
120
|
+
user_id: str,
|
|
121
|
+
event_id: str,
|
|
122
|
+
source_word_id: int,
|
|
123
|
+
delta: int,
|
|
124
|
+
) -> bool:
|
|
125
|
+
"""Atomically check-apply-mark a score delta in one transaction.
|
|
126
|
+
|
|
127
|
+
Returns True if the delta was applied, False if event_id was already applied.
|
|
128
|
+
The entire operation (check + increment + mark) runs in a single transaction.
|
|
129
|
+
"""
|
|
130
|
+
|
|
106
131
|
@abstractmethod
|
|
107
132
|
async def create_tables(
|
|
108
133
|
self,
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
"""NeonBackend
|
|
1
|
+
"""NeonBackend asyncpg implementation for Neon PostgreSQL."""
|
|
2
2
|
|
|
3
3
|
import asyncpg
|
|
4
4
|
|
|
5
5
|
from nl_processing.database.backend._neon_exercise import (
|
|
6
|
+
atomic_apply_delta,
|
|
6
7
|
check_event,
|
|
7
8
|
create_exercise_tables,
|
|
8
9
|
get_scores,
|
|
@@ -14,6 +15,7 @@ from nl_processing.database.backend._queries import (
|
|
|
14
15
|
CREATE_USER_WORDS,
|
|
15
16
|
add_translation_link_query,
|
|
16
17
|
add_word_query,
|
|
18
|
+
count_user_words_query,
|
|
17
19
|
create_translations_table,
|
|
18
20
|
create_words_table,
|
|
19
21
|
get_user_words_query,
|
|
@@ -34,7 +36,6 @@ class NeonBackend(AbstractBackend):
|
|
|
34
36
|
self._connection: asyncpg.Connection | None = None # type: ignore[type-arg]
|
|
35
37
|
|
|
36
38
|
async def _connect(self) -> asyncpg.Connection: # type: ignore[type-arg]
|
|
37
|
-
"""Return cached connection, creating it lazily on first call."""
|
|
38
39
|
if self._connection is None:
|
|
39
40
|
try:
|
|
40
41
|
self._connection = await asyncpg.connect(dsn=self._database_url)
|
|
@@ -43,13 +44,12 @@ class NeonBackend(AbstractBackend):
|
|
|
43
44
|
raise DatabaseError(str(exc)) from exc
|
|
44
45
|
except OSError as exc:
|
|
45
46
|
raise DatabaseError(str(exc)) from exc
|
|
47
|
+
if self._connection is None:
|
|
48
|
+
raise DatabaseError("Database connection was not initialized")
|
|
46
49
|
return self._connection
|
|
47
50
|
|
|
48
51
|
async def create_tables(
|
|
49
|
-
self,
|
|
50
|
-
languages: list[str],
|
|
51
|
-
pairs: list[tuple[str, str]],
|
|
52
|
-
exercise_slugs: list[str],
|
|
52
|
+
self, languages: list[str], pairs: list[tuple[str, str]], exercise_slugs: list[str]
|
|
53
53
|
) -> None:
|
|
54
54
|
conn = await self._connect()
|
|
55
55
|
try:
|
|
@@ -78,11 +78,7 @@ class NeonBackend(AbstractBackend):
|
|
|
78
78
|
return None
|
|
79
79
|
return int(row["id"])
|
|
80
80
|
|
|
81
|
-
async def get_word(
|
|
82
|
-
self,
|
|
83
|
-
table: str,
|
|
84
|
-
normalized_form: str,
|
|
85
|
-
) -> dict[str, str | int] | None:
|
|
81
|
+
async def get_word(self, table: str, normalized_form: str) -> dict[str, str | int] | None:
|
|
86
82
|
conn = await self._connect()
|
|
87
83
|
try:
|
|
88
84
|
row = await conn.fetchrow(get_word_query(table), normalized_form)
|
|
@@ -96,24 +92,14 @@ class NeonBackend(AbstractBackend):
|
|
|
96
92
|
"word_type": row["word_type"],
|
|
97
93
|
}
|
|
98
94
|
|
|
99
|
-
async def add_translation_link(
|
|
100
|
-
self,
|
|
101
|
-
table: str,
|
|
102
|
-
source_id: int,
|
|
103
|
-
target_id: int,
|
|
104
|
-
) -> None:
|
|
95
|
+
async def add_translation_link(self, table: str, source_id: int, target_id: int) -> None:
|
|
105
96
|
conn = await self._connect()
|
|
106
97
|
try:
|
|
107
98
|
await conn.execute(add_translation_link_query(table), source_id, target_id)
|
|
108
99
|
except asyncpg.PostgresError as exc:
|
|
109
100
|
raise DatabaseError(str(exc)) from exc
|
|
110
101
|
|
|
111
|
-
async def add_user_word(
|
|
112
|
-
self,
|
|
113
|
-
user_id: str,
|
|
114
|
-
word_id: int,
|
|
115
|
-
language: str,
|
|
116
|
-
) -> None:
|
|
102
|
+
async def add_user_word(self, user_id: str, word_id: int, language: str) -> None:
|
|
117
103
|
conn = await self._connect()
|
|
118
104
|
try:
|
|
119
105
|
await conn.execute(ADD_USER_WORD, user_id, word_id, language)
|
|
@@ -144,6 +130,19 @@ class NeonBackend(AbstractBackend):
|
|
|
144
130
|
raise DatabaseError(str(exc)) from exc
|
|
145
131
|
return [dict(row) for row in rows]
|
|
146
132
|
|
|
133
|
+
async def count_user_words(self, user_id: str, language: str, word_type: str | None = None) -> int:
|
|
134
|
+
conn = await self._connect()
|
|
135
|
+
args: list[str] = [user_id, language]
|
|
136
|
+
if word_type is not None:
|
|
137
|
+
args.append(word_type)
|
|
138
|
+
try:
|
|
139
|
+
count = await conn.fetchval(count_user_words_query(language, word_type), *args)
|
|
140
|
+
except asyncpg.PostgresError as exc:
|
|
141
|
+
raise DatabaseError(str(exc)) from exc
|
|
142
|
+
if count is None:
|
|
143
|
+
return 0
|
|
144
|
+
return int(count)
|
|
145
|
+
|
|
147
146
|
async def increment_user_exercise_score(
|
|
148
147
|
self,
|
|
149
148
|
table: str,
|
|
@@ -155,36 +154,29 @@ class NeonBackend(AbstractBackend):
|
|
|
155
154
|
return await increment_score(conn, table, user_id, source_word_id, delta)
|
|
156
155
|
|
|
157
156
|
async def get_user_exercise_scores(
|
|
158
|
-
self,
|
|
159
|
-
table: str,
|
|
160
|
-
user_id: str,
|
|
161
|
-
source_word_ids: list[int],
|
|
157
|
+
self, table: str, user_id: str, source_word_ids: list[int]
|
|
162
158
|
) -> list[dict[str, str | int]]:
|
|
163
159
|
conn = await self._connect()
|
|
164
160
|
return await get_scores(conn, table, user_id, source_word_ids)
|
|
165
161
|
|
|
166
|
-
async def check_event_applied(
|
|
167
|
-
self,
|
|
168
|
-
table: str,
|
|
169
|
-
event_id: str,
|
|
170
|
-
) -> bool:
|
|
162
|
+
async def check_event_applied(self, table: str, event_id: str) -> bool:
|
|
171
163
|
conn = await self._connect()
|
|
172
164
|
return await check_event(conn, table, event_id)
|
|
173
165
|
|
|
174
|
-
async def mark_event_applied(
|
|
175
|
-
self,
|
|
176
|
-
table: str,
|
|
177
|
-
event_id: str,
|
|
178
|
-
) -> None:
|
|
166
|
+
async def mark_event_applied(self, table: str, event_id: str) -> None:
|
|
179
167
|
conn = await self._connect()
|
|
180
168
|
await mark_event(conn, table, event_id)
|
|
181
169
|
|
|
170
|
+
async def apply_score_delta_atomic(
|
|
171
|
+
self, score_table: str, events_table: str,
|
|
172
|
+
user_id: str, event_id: str, source_word_id: int, delta: int,
|
|
173
|
+
) -> bool: # fmt: skip
|
|
174
|
+
conn = await self._connect()
|
|
175
|
+
return await atomic_apply_delta(conn, score_table, events_table, user_id, event_id, source_word_id, delta)
|
|
182
176
|
|
|
183
|
-
def _infer_target_language(source_language: str) -> str:
|
|
184
|
-
"""Infer the target language for translation lookups.
|
|
185
177
|
|
|
186
|
-
|
|
187
|
-
"""
|
|
178
|
+
def _infer_target_language(source_language: str) -> str:
|
|
179
|
+
"""Infer the other language in the nl/ru pair."""
|
|
188
180
|
if source_language == "nl":
|
|
189
181
|
return "ru"
|
|
190
182
|
return "nl"
|
|
@@ -119,25 +119,21 @@ class ExerciseProgressStore:
|
|
|
119
119
|
) -> None:
|
|
120
120
|
"""Apply a score delta idempotently, guarded by event deduplication.
|
|
121
121
|
|
|
122
|
-
Validates exercise_type. Skips if event_id was already applied.
|
|
122
|
+
Validates exercise_type and delta. Skips if event_id was already applied.
|
|
123
|
+
The check-increment-mark operation is atomic (single transaction).
|
|
123
124
|
"""
|
|
124
125
|
self._validate_exercise_type(exercise_type)
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
)
|
|
129
|
-
if already_applied:
|
|
130
|
-
return
|
|
126
|
+
if delta not in (1, -1):
|
|
127
|
+
msg = f"delta must be +1 or -1, got {delta}"
|
|
128
|
+
raise ValueError(msg)
|
|
131
129
|
table = self._score_tables[exercise_type]
|
|
132
|
-
await self._backend.
|
|
133
|
-
table,
|
|
134
|
-
self.
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
self._applied_events_table,
|
|
140
|
-
event_id,
|
|
130
|
+
await self._backend.apply_score_delta_atomic(
|
|
131
|
+
score_table=table,
|
|
132
|
+
events_table=self._applied_events_table,
|
|
133
|
+
user_id=self._user_id,
|
|
134
|
+
event_id=event_id,
|
|
135
|
+
source_word_id=source_word_id,
|
|
136
|
+
delta=delta,
|
|
141
137
|
)
|
|
142
138
|
|
|
143
139
|
def _validate_exercise_type(self, exercise_type: str) -> None:
|
|
@@ -140,6 +140,19 @@ class DatabaseService:
|
|
|
140
140
|
language=self._target_language,
|
|
141
141
|
)
|
|
142
142
|
pairs.append(WordPair(source=source, target=target))
|
|
143
|
+
if limit is None and not random:
|
|
144
|
+
total_count = await self._backend.count_user_words(
|
|
145
|
+
self._user_id,
|
|
146
|
+
self._source_language.value,
|
|
147
|
+
word_type=word_type.value if word_type else None,
|
|
148
|
+
)
|
|
149
|
+
if total_count > len(pairs):
|
|
150
|
+
excluded_count = total_count - len(pairs)
|
|
151
|
+
_logger.warning(
|
|
152
|
+
"%d of %d words excluded from get_words() due to missing translations",
|
|
153
|
+
excluded_count,
|
|
154
|
+
total_count,
|
|
155
|
+
)
|
|
143
156
|
return pairs
|
|
144
157
|
|
|
145
158
|
@classmethod
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""DDL and query constants for the local SQLite cache store."""
|
|
2
|
+
|
|
3
|
+
DDL_CACHED_WORD_PAIRS = """
|
|
4
|
+
CREATE TABLE IF NOT EXISTS cached_word_pairs (
|
|
5
|
+
source_word_id INTEGER PRIMARY KEY,
|
|
6
|
+
source_normalized_form TEXT NOT NULL,
|
|
7
|
+
source_word_type TEXT NOT NULL,
|
|
8
|
+
target_word_id INTEGER NOT NULL,
|
|
9
|
+
target_normalized_form TEXT NOT NULL,
|
|
10
|
+
target_word_type TEXT NOT NULL
|
|
11
|
+
)"""
|
|
12
|
+
|
|
13
|
+
DDL_CACHED_SCORES = """
|
|
14
|
+
CREATE TABLE IF NOT EXISTS cached_scores (
|
|
15
|
+
source_word_id INTEGER NOT NULL,
|
|
16
|
+
exercise_type TEXT NOT NULL,
|
|
17
|
+
score INTEGER NOT NULL DEFAULT 0,
|
|
18
|
+
updated_at TEXT NOT NULL,
|
|
19
|
+
PRIMARY KEY (source_word_id, exercise_type)
|
|
20
|
+
)"""
|
|
21
|
+
|
|
22
|
+
DDL_PENDING_SCORE_EVENTS = """
|
|
23
|
+
CREATE TABLE IF NOT EXISTS pending_score_events (
|
|
24
|
+
event_id TEXT PRIMARY KEY,
|
|
25
|
+
source_word_id INTEGER NOT NULL,
|
|
26
|
+
exercise_type TEXT NOT NULL,
|
|
27
|
+
delta INTEGER NOT NULL,
|
|
28
|
+
created_at TEXT NOT NULL,
|
|
29
|
+
flushed_at TEXT,
|
|
30
|
+
last_error TEXT
|
|
31
|
+
)"""
|
|
32
|
+
|
|
33
|
+
DDL_CACHE_METADATA = """
|
|
34
|
+
CREATE TABLE IF NOT EXISTS cache_metadata (
|
|
35
|
+
id INTEGER PRIMARY KEY DEFAULT 1,
|
|
36
|
+
exercise_types TEXT NOT NULL,
|
|
37
|
+
schema_version INTEGER NOT NULL DEFAULT 1,
|
|
38
|
+
last_refresh_started_at TEXT,
|
|
39
|
+
last_refresh_completed_at TEXT,
|
|
40
|
+
last_flush_completed_at TEXT,
|
|
41
|
+
last_error TEXT
|
|
42
|
+
)"""
|
|
43
|
+
|
|
44
|
+
ALL_DDL = [DDL_CACHED_WORD_PAIRS, DDL_CACHED_SCORES, DDL_PENDING_SCORE_EVENTS, DDL_CACHE_METADATA]
|
|
45
|
+
|
|
46
|
+
UPSERT_SCORE = (
|
|
47
|
+
"INSERT INTO cached_scores (source_word_id, exercise_type, score, updated_at) VALUES (?, ?, ?, ?)"
|
|
48
|
+
" ON CONFLICT(source_word_id, exercise_type) DO UPDATE SET score = score + ?, updated_at = ?"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
INSERT_PENDING_EVENT = (
|
|
52
|
+
"INSERT INTO pending_score_events (event_id, source_word_id, exercise_type, delta, created_at)"
|
|
53
|
+
" VALUES (?, ?, ?, ?, ?)"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
INSERT_WORD_PAIR = "INSERT INTO cached_word_pairs VALUES (?, ?, ?, ?, ?, ?)"
|
|
57
|
+
|
|
58
|
+
INSERT_SCORE = "INSERT INTO cached_scores (source_word_id, exercise_type, score, updated_at) VALUES (?, ?, ?, ?)"
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
class CacheNotReadyError(Exception):
|
|
2
|
+
"""Raised when cached data is requested before the first usable snapshot exists."""
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class CacheStorageError(Exception):
|
|
6
|
+
"""Raised when the local SQLite cache file cannot be opened, read, or updated."""
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CacheSyncError(Exception):
|
|
10
|
+
"""Raised when an explicit refresh or flush operation fails synchronously."""
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""SQLite data-access layer for the local word-pair / score cache."""
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime
|
|
4
|
+
import json
|
|
5
|
+
import sqlite3
|
|
6
|
+
|
|
7
|
+
import aiosqlite
|
|
8
|
+
|
|
9
|
+
from nl_processing.database_cache._local_store_queries import (
|
|
10
|
+
ALL_DDL,
|
|
11
|
+
INSERT_PENDING_EVENT,
|
|
12
|
+
INSERT_SCORE,
|
|
13
|
+
INSERT_WORD_PAIR,
|
|
14
|
+
UPSERT_SCORE,
|
|
15
|
+
)
|
|
16
|
+
from nl_processing.database_cache.exceptions import CacheStorageError
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _now() -> str:
|
|
20
|
+
return datetime.now(tz=UTC).isoformat()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LocalStore:
|
|
24
|
+
"""Async SQLite store for cached word pairs, scores, and pending events."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, db_path: str) -> None:
|
|
27
|
+
self._db_path = db_path
|
|
28
|
+
self._db: aiosqlite.Connection | None = None
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def _conn(self) -> aiosqlite.Connection:
|
|
32
|
+
if self._db is None:
|
|
33
|
+
raise CacheStorageError("LocalStore is not open")
|
|
34
|
+
return self._db
|
|
35
|
+
|
|
36
|
+
async def open(self) -> None:
|
|
37
|
+
"""Open the SQLite connection and create tables."""
|
|
38
|
+
try:
|
|
39
|
+
self._db = await aiosqlite.connect(self._db_path)
|
|
40
|
+
self._db.row_factory = aiosqlite.Row
|
|
41
|
+
await self._db.execute("PRAGMA journal_mode=WAL")
|
|
42
|
+
for ddl in ALL_DDL:
|
|
43
|
+
await self._db.execute(ddl)
|
|
44
|
+
await self._db.commit()
|
|
45
|
+
except sqlite3.Error as exc:
|
|
46
|
+
raise CacheStorageError(str(exc)) from exc
|
|
47
|
+
|
|
48
|
+
async def close(self) -> None:
|
|
49
|
+
"""Close the SQLite connection."""
|
|
50
|
+
if self._db:
|
|
51
|
+
await self._db.close()
|
|
52
|
+
self._db = None
|
|
53
|
+
|
|
54
|
+
async def get_cached_word_pairs(
|
|
55
|
+
self,
|
|
56
|
+
word_type: str | None = None,
|
|
57
|
+
limit: int | None = None,
|
|
58
|
+
*,
|
|
59
|
+
random: bool = False,
|
|
60
|
+
) -> list[dict[str, str | int]]:
|
|
61
|
+
"""Query cached word pairs with optional filter, limit, and random ordering."""
|
|
62
|
+
sql = "SELECT * FROM cached_word_pairs"
|
|
63
|
+
params: list[str | int] = []
|
|
64
|
+
if word_type is not None:
|
|
65
|
+
sql += " WHERE source_word_type = ?"
|
|
66
|
+
params.append(word_type)
|
|
67
|
+
if random:
|
|
68
|
+
sql += " ORDER BY RANDOM()"
|
|
69
|
+
if limit is not None:
|
|
70
|
+
sql += " LIMIT ?"
|
|
71
|
+
params.append(limit)
|
|
72
|
+
return await self._fetch_all(sql, params)
|
|
73
|
+
|
|
74
|
+
async def get_cached_word_pairs_with_scores(self, exercise_types: list[str]) -> list[dict[str, str | int]]:
|
|
75
|
+
"""Query word pairs and attach scores per exercise type (missing = 0)."""
|
|
76
|
+
try:
|
|
77
|
+
rows = await self._fetch_all("SELECT * FROM cached_word_pairs")
|
|
78
|
+
for row in rows:
|
|
79
|
+
for et in exercise_types:
|
|
80
|
+
sc = await self._conn.execute(
|
|
81
|
+
"SELECT score FROM cached_scores WHERE source_word_id=? AND exercise_type=?",
|
|
82
|
+
(row["source_word_id"], et),
|
|
83
|
+
)
|
|
84
|
+
score_row = await sc.fetchone()
|
|
85
|
+
row[f"score_{et}"] = int(score_row["score"]) if score_row else 0
|
|
86
|
+
return rows
|
|
87
|
+
except sqlite3.Error as exc:
|
|
88
|
+
raise CacheStorageError(str(exc)) from exc
|
|
89
|
+
|
|
90
|
+
async def get_pending_events(self) -> list[dict[str, str | int]]:
|
|
91
|
+
return await self._fetch_all("SELECT * FROM pending_score_events WHERE flushed_at IS NULL ORDER BY created_at")
|
|
92
|
+
|
|
93
|
+
async def get_pending_event_count(self) -> int:
|
|
94
|
+
try:
|
|
95
|
+
cur = await self._conn.execute("SELECT COUNT(*) FROM pending_score_events WHERE flushed_at IS NULL")
|
|
96
|
+
row = await cur.fetchone()
|
|
97
|
+
return int(row[0]) if row else 0
|
|
98
|
+
except sqlite3.Error as exc:
|
|
99
|
+
raise CacheStorageError(str(exc)) from exc
|
|
100
|
+
|
|
101
|
+
async def get_metadata(self) -> dict[str, str | int] | None:
|
|
102
|
+
try:
|
|
103
|
+
cur = await self._conn.execute("SELECT * FROM cache_metadata WHERE id = 1")
|
|
104
|
+
row = await cur.fetchone()
|
|
105
|
+
return dict(row) if row else None
|
|
106
|
+
except sqlite3.Error as exc:
|
|
107
|
+
raise CacheStorageError(str(exc)) from exc
|
|
108
|
+
|
|
109
|
+
async def has_snapshot(self) -> bool:
|
|
110
|
+
try:
|
|
111
|
+
cur = await self._conn.execute("SELECT 1 FROM cached_word_pairs LIMIT 1")
|
|
112
|
+
return (await cur.fetchone()) is not None
|
|
113
|
+
except sqlite3.Error as exc:
|
|
114
|
+
raise CacheStorageError(str(exc)) from exc
|
|
115
|
+
|
|
116
|
+
async def record_score_and_event(
|
|
117
|
+
self,
|
|
118
|
+
source_word_id: int,
|
|
119
|
+
exercise_type: str,
|
|
120
|
+
delta: int,
|
|
121
|
+
event_id: str,
|
|
122
|
+
) -> None:
|
|
123
|
+
"""Atomically upsert a cached score and insert a pending event."""
|
|
124
|
+
now = _now()
|
|
125
|
+
try:
|
|
126
|
+
await self._conn.execute(UPSERT_SCORE, (source_word_id, exercise_type, delta, now, delta, now))
|
|
127
|
+
await self._conn.execute(INSERT_PENDING_EVENT, (event_id, source_word_id, exercise_type, delta, now))
|
|
128
|
+
await self._conn.commit()
|
|
129
|
+
except sqlite3.Error as exc:
|
|
130
|
+
raise CacheStorageError(str(exc)) from exc
|
|
131
|
+
|
|
132
|
+
async def rebuild_snapshot(
|
|
133
|
+
self,
|
|
134
|
+
word_pairs: list[tuple[int, str, str, int, str, str]],
|
|
135
|
+
scores: dict[tuple[int, str], int],
|
|
136
|
+
) -> None:
|
|
137
|
+
"""Atomically replace cached word pairs and scores, then reapply pending events."""
|
|
138
|
+
now = _now()
|
|
139
|
+
try:
|
|
140
|
+
await self._conn.execute("DELETE FROM cached_word_pairs")
|
|
141
|
+
await self._conn.execute("DELETE FROM cached_scores")
|
|
142
|
+
for wp in word_pairs:
|
|
143
|
+
await self._conn.execute(INSERT_WORD_PAIR, wp)
|
|
144
|
+
for (wid, et), score in scores.items():
|
|
145
|
+
await self._conn.execute(INSERT_SCORE, (wid, et, score, now))
|
|
146
|
+
for evt in await self.get_pending_events():
|
|
147
|
+
await self._conn.execute(
|
|
148
|
+
UPSERT_SCORE,
|
|
149
|
+
(evt["source_word_id"], evt["exercise_type"], evt["delta"], now, evt["delta"], now),
|
|
150
|
+
)
|
|
151
|
+
await self._conn.commit()
|
|
152
|
+
except sqlite3.Error as exc:
|
|
153
|
+
raise CacheStorageError(str(exc)) from exc
|
|
154
|
+
|
|
155
|
+
async def mark_event_flushed(self, event_id: str) -> None:
|
|
156
|
+
await self._exec_commit("UPDATE pending_score_events SET flushed_at=? WHERE event_id=?", (_now(), event_id))
|
|
157
|
+
|
|
158
|
+
async def mark_event_failed(self, event_id: str, error: str) -> None:
|
|
159
|
+
await self._exec_commit("UPDATE pending_score_events SET last_error=? WHERE event_id=?", (error, event_id))
|
|
160
|
+
|
|
161
|
+
async def update_metadata(self, **fields: str | int | None) -> None:
|
|
162
|
+
if not fields:
|
|
163
|
+
return
|
|
164
|
+
set_clause = ", ".join(f"{k} = ?" for k in fields)
|
|
165
|
+
await self._exec_commit(
|
|
166
|
+
f"UPDATE cache_metadata SET {set_clause} WHERE id = 1", # noqa: S608
|
|
167
|
+
tuple(fields.values()),
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
async def ensure_metadata(self, exercise_types: list[str]) -> None:
|
|
171
|
+
await self._exec_commit(
|
|
172
|
+
"INSERT OR REPLACE INTO cache_metadata (id, exercise_types, schema_version) VALUES (1, ?, 1)",
|
|
173
|
+
(json.dumps(exercise_types),),
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
async def get_source_word_id(self, normalized_form: str, word_type: str) -> int | None:
|
|
177
|
+
"""Look up a source_word_id from cached_word_pairs."""
|
|
178
|
+
try:
|
|
179
|
+
cur = await self._conn.execute(
|
|
180
|
+
"SELECT source_word_id FROM cached_word_pairs WHERE source_normalized_form=? AND source_word_type=?",
|
|
181
|
+
(normalized_form, word_type),
|
|
182
|
+
)
|
|
183
|
+
row = await cur.fetchone()
|
|
184
|
+
return int(row["source_word_id"]) if row else None
|
|
185
|
+
except sqlite3.Error as exc:
|
|
186
|
+
raise CacheStorageError(str(exc)) from exc
|
|
187
|
+
|
|
188
|
+
async def _fetch_all(self, sql: str, params: list[str | int] | None = None) -> list[dict[str, str | int]]:
|
|
189
|
+
try:
|
|
190
|
+
cur = await self._conn.execute(sql, params or [])
|
|
191
|
+
return [dict(row) for row in await cur.fetchall()]
|
|
192
|
+
except sqlite3.Error as exc:
|
|
193
|
+
raise CacheStorageError(str(exc)) from exc
|
|
194
|
+
|
|
195
|
+
async def _exec_commit(self, sql: str, params: tuple[str | int | None, ...]) -> None:
|
|
196
|
+
try:
|
|
197
|
+
await self._conn.execute(sql, params)
|
|
198
|
+
await self._conn.commit()
|
|
199
|
+
except sqlite3.Error as exc:
|
|
200
|
+
raise CacheStorageError(str(exc)) from exc
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class CacheStatus(BaseModel):
|
|
7
|
+
is_ready: bool
|
|
8
|
+
is_stale: bool
|
|
9
|
+
has_snapshot: bool
|
|
10
|
+
pending_events: int
|
|
11
|
+
last_refresh_completed_at: datetime | None
|
|
12
|
+
last_flush_completed_at: datetime | None
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""DatabaseCacheService — public API for the local SQLite cache layer."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from datetime import UTC, datetime, timedelta
|
|
5
|
+
import json
|
|
6
|
+
import tempfile
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
|
|
9
|
+
from nl_processing.core.models import Language, PartOfSpeech, Word
|
|
10
|
+
from nl_processing.database.exercise_progress import ExerciseProgressStore
|
|
11
|
+
from nl_processing.database.models import ScoredWordPair, WordPair
|
|
12
|
+
from nl_processing.database_cache.exceptions import CacheNotReadyError
|
|
13
|
+
from nl_processing.database_cache.local_store import LocalStore
|
|
14
|
+
from nl_processing.database_cache.logging import get_logger
|
|
15
|
+
from nl_processing.database_cache.models import CacheStatus
|
|
16
|
+
from nl_processing.database_cache.sync import CacheSyncer
|
|
17
|
+
|
|
18
|
+
_log = get_logger("service")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DatabaseCacheService:
|
|
22
|
+
"""Offline-first cache backed by a local SQLite database."""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
*,
|
|
27
|
+
user_id: str,
|
|
28
|
+
source_language: Language,
|
|
29
|
+
target_language: Language,
|
|
30
|
+
exercise_types: list[str],
|
|
31
|
+
cache_ttl: timedelta,
|
|
32
|
+
cache_dir: str | None = None,
|
|
33
|
+
) -> None:
|
|
34
|
+
if not exercise_types:
|
|
35
|
+
msg = "exercise_types must be a non-empty list"
|
|
36
|
+
raise ValueError(msg)
|
|
37
|
+
self._user_id = user_id
|
|
38
|
+
self._source_language = source_language
|
|
39
|
+
self._target_language = target_language
|
|
40
|
+
self._exercise_types = list(exercise_types)
|
|
41
|
+
self._cache_ttl = cache_ttl
|
|
42
|
+
base = cache_dir or tempfile.gettempdir()
|
|
43
|
+
self._db_path = f"{base}/{user_id}_{source_language.value}_{target_language.value}.db"
|
|
44
|
+
self._initialized = False
|
|
45
|
+
self._local: LocalStore | None = None
|
|
46
|
+
self._syncer: CacheSyncer | None = None
|
|
47
|
+
|
|
48
|
+
async def init(self) -> CacheStatus:
|
|
49
|
+
"""Open local store, bootstrap or refresh as needed, return status."""
|
|
50
|
+
progress_store = ExerciseProgressStore(
|
|
51
|
+
user_id=self._user_id,
|
|
52
|
+
source_language=self._source_language,
|
|
53
|
+
target_language=self._target_language,
|
|
54
|
+
exercise_types=self._exercise_types,
|
|
55
|
+
)
|
|
56
|
+
self._local = LocalStore(self._db_path)
|
|
57
|
+
await self._local.open()
|
|
58
|
+
self._syncer = CacheSyncer(self._local, progress_store)
|
|
59
|
+
await self._local.ensure_metadata(self._exercise_types)
|
|
60
|
+
meta = await self._local.get_metadata()
|
|
61
|
+
if meta and json.loads(str(meta["exercise_types"])) != self._exercise_types:
|
|
62
|
+
await self._local.ensure_metadata(self._exercise_types)
|
|
63
|
+
await self._syncer.refresh()
|
|
64
|
+
elif not await self._local.has_snapshot():
|
|
65
|
+
await self._syncer.refresh()
|
|
66
|
+
elif self._is_stale(meta):
|
|
67
|
+
asyncio.create_task(self._background_refresh())
|
|
68
|
+
self._initialized = True
|
|
69
|
+
return await self.get_status()
|
|
70
|
+
|
|
71
|
+
async def get_words(
|
|
72
|
+
self,
|
|
73
|
+
*,
|
|
74
|
+
word_type: str | None = None,
|
|
75
|
+
limit: int | None = None,
|
|
76
|
+
random: bool = False,
|
|
77
|
+
) -> list[WordPair]:
|
|
78
|
+
"""Return cached word pairs, optionally filtered."""
|
|
79
|
+
self._ensure_ready()
|
|
80
|
+
assert self._local is not None
|
|
81
|
+
rows = await self._local.get_cached_word_pairs(word_type=word_type, limit=limit, random=random)
|
|
82
|
+
return [self._row_to_word_pair(r) for r in rows]
|
|
83
|
+
|
|
84
|
+
async def get_word_pairs_with_scores(self) -> list[ScoredWordPair]:
|
|
85
|
+
"""Return cached word pairs with exercise scores."""
|
|
86
|
+
self._ensure_ready()
|
|
87
|
+
assert self._local is not None
|
|
88
|
+
rows = await self._local.get_cached_word_pairs_with_scores(self._exercise_types)
|
|
89
|
+
result: list[ScoredWordPair] = []
|
|
90
|
+
for row in rows:
|
|
91
|
+
pair = self._row_to_word_pair(row)
|
|
92
|
+
scores = {et: int(row[f"score_{et}"]) for et in self._exercise_types}
|
|
93
|
+
result.append(ScoredWordPair(pair=pair, scores=scores, source_word_id=int(row["source_word_id"])))
|
|
94
|
+
return result
|
|
95
|
+
|
|
96
|
+
async def record_exercise_result(self, *, source_word: Word, exercise_type: str, delta: int) -> None:
|
|
97
|
+
"""Record a score change locally and queue for remote flush."""
|
|
98
|
+
self._ensure_ready()
|
|
99
|
+
assert self._local is not None
|
|
100
|
+
if exercise_type not in self._exercise_types:
|
|
101
|
+
msg = f"Unknown exercise_type '{exercise_type}'; expected one of {sorted(self._exercise_types)}"
|
|
102
|
+
raise ValueError(msg)
|
|
103
|
+
if delta not in (1, -1):
|
|
104
|
+
msg = f"delta must be +1 or -1, got {delta}"
|
|
105
|
+
raise ValueError(msg)
|
|
106
|
+
wid = await self._local.get_source_word_id(source_word.normalized_form, source_word.word_type.value)
|
|
107
|
+
if wid is None:
|
|
108
|
+
msg = f"Word '{source_word.normalized_form}' not found in cache"
|
|
109
|
+
raise ValueError(msg)
|
|
110
|
+
await self._local.record_score_and_event(wid, exercise_type, delta, str(uuid4()))
|
|
111
|
+
asyncio.create_task(self._background_flush())
|
|
112
|
+
|
|
113
|
+
async def refresh(self) -> None:
|
|
114
|
+
"""Trigger a full cache refresh from the remote database."""
|
|
115
|
+
assert self._syncer is not None
|
|
116
|
+
await self._syncer.refresh()
|
|
117
|
+
|
|
118
|
+
async def flush(self) -> None:
|
|
119
|
+
"""Flush pending score events to the remote database."""
|
|
120
|
+
assert self._syncer is not None
|
|
121
|
+
await self._syncer.flush()
|
|
122
|
+
|
|
123
|
+
async def get_status(self) -> CacheStatus:
|
|
124
|
+
"""Build current cache status from metadata and pending events."""
|
|
125
|
+
assert self._local is not None
|
|
126
|
+
meta = await self._local.get_metadata()
|
|
127
|
+
has_snap = await self._local.has_snapshot()
|
|
128
|
+
pending = await self._local.get_pending_event_count()
|
|
129
|
+
last_refresh = _parse_dt(meta, "last_refresh_completed_at") if meta else None
|
|
130
|
+
last_flush = _parse_dt(meta, "last_flush_completed_at") if meta else None
|
|
131
|
+
return CacheStatus(
|
|
132
|
+
is_ready=self._initialized and has_snap,
|
|
133
|
+
is_stale=self._is_stale(meta),
|
|
134
|
+
has_snapshot=has_snap,
|
|
135
|
+
pending_events=pending,
|
|
136
|
+
last_refresh_completed_at=last_refresh,
|
|
137
|
+
last_flush_completed_at=last_flush,
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
def _ensure_ready(self) -> None:
|
|
141
|
+
if not self._initialized or self._local is None:
|
|
142
|
+
raise CacheNotReadyError("Cache not initialized — call init() first")
|
|
143
|
+
|
|
144
|
+
def _is_stale(self, meta: dict[str, str | int] | None) -> bool:
|
|
145
|
+
if not meta:
|
|
146
|
+
return True
|
|
147
|
+
last_refresh = _parse_dt(meta, "last_refresh_completed_at")
|
|
148
|
+
if last_refresh is None:
|
|
149
|
+
return True
|
|
150
|
+
return datetime.now(tz=UTC) - last_refresh > self._cache_ttl
|
|
151
|
+
|
|
152
|
+
def _row_to_word_pair(self, row: dict[str, str | int]) -> WordPair:
|
|
153
|
+
return WordPair(
|
|
154
|
+
source=Word(
|
|
155
|
+
normalized_form=str(row["source_normalized_form"]),
|
|
156
|
+
word_type=PartOfSpeech(row["source_word_type"]),
|
|
157
|
+
language=self._source_language,
|
|
158
|
+
),
|
|
159
|
+
target=Word(
|
|
160
|
+
normalized_form=str(row["target_normalized_form"]),
|
|
161
|
+
word_type=PartOfSpeech(row["target_word_type"]),
|
|
162
|
+
language=self._target_language,
|
|
163
|
+
),
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
async def _background_refresh(self) -> None:
|
|
167
|
+
try:
|
|
168
|
+
assert self._syncer is not None
|
|
169
|
+
await self._syncer.refresh()
|
|
170
|
+
except Exception:
|
|
171
|
+
_log.exception("background refresh failed")
|
|
172
|
+
|
|
173
|
+
async def _background_flush(self) -> None:
|
|
174
|
+
try:
|
|
175
|
+
assert self._syncer is not None
|
|
176
|
+
await self._syncer.flush(skip_if_running=True)
|
|
177
|
+
except Exception:
|
|
178
|
+
_log.exception("background flush failed")
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _parse_dt(meta: dict[str, str | int], key: str) -> datetime | None:
|
|
182
|
+
val = meta[key] if key in meta else None
|
|
183
|
+
if val is None:
|
|
184
|
+
return None
|
|
185
|
+
return datetime.fromisoformat(str(val))
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Refresh / flush orchestration for the local cache."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
|
|
6
|
+
from nl_processing.database.exercise_progress import ExerciseProgressStore
|
|
7
|
+
from nl_processing.database_cache.exceptions import CacheSyncError
|
|
8
|
+
from nl_processing.database_cache.local_store import LocalStore
|
|
9
|
+
from nl_processing.database_cache.logging import get_logger
|
|
10
|
+
|
|
11
|
+
_log = get_logger("sync")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class CacheSyncer:
|
|
15
|
+
"""Coordinates full refresh from remote and flush of pending events back to remote."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, local_store: LocalStore, progress_store: ExerciseProgressStore) -> None:
|
|
18
|
+
self._local = local_store
|
|
19
|
+
self._remote = progress_store
|
|
20
|
+
self._refresh_lock = asyncio.Lock()
|
|
21
|
+
self._flush_lock = asyncio.Lock()
|
|
22
|
+
|
|
23
|
+
async def refresh(self) -> None:
|
|
24
|
+
"""Pull a full snapshot from the remote database and rebuild the local cache."""
|
|
25
|
+
if self._refresh_lock.locked():
|
|
26
|
+
return
|
|
27
|
+
async with self._refresh_lock:
|
|
28
|
+
now = datetime.now(tz=UTC).isoformat()
|
|
29
|
+
try:
|
|
30
|
+
await self._local.update_metadata(last_refresh_started_at=now)
|
|
31
|
+
scored_pairs = await self._remote.export_remote_snapshot()
|
|
32
|
+
word_pairs: list[tuple[int, str, str, int, str, str]] = [
|
|
33
|
+
(
|
|
34
|
+
sp.source_word_id,
|
|
35
|
+
sp.pair.source.normalized_form,
|
|
36
|
+
sp.pair.source.word_type.value,
|
|
37
|
+
0,
|
|
38
|
+
sp.pair.target.normalized_form,
|
|
39
|
+
sp.pair.target.word_type.value,
|
|
40
|
+
)
|
|
41
|
+
for sp in scored_pairs
|
|
42
|
+
]
|
|
43
|
+
scores: dict[tuple[int, str], int] = {}
|
|
44
|
+
for sp in scored_pairs:
|
|
45
|
+
for exercise_type, score in sp.scores.items():
|
|
46
|
+
scores[(sp.source_word_id, exercise_type)] = score
|
|
47
|
+
await self._local.rebuild_snapshot(word_pairs, scores)
|
|
48
|
+
await self._local.update_metadata(
|
|
49
|
+
last_refresh_completed_at=datetime.now(tz=UTC).isoformat(),
|
|
50
|
+
)
|
|
51
|
+
except CacheSyncError:
|
|
52
|
+
raise
|
|
53
|
+
except Exception as exc:
|
|
54
|
+
_log.exception("refresh failed")
|
|
55
|
+
await self._local.update_metadata(last_error=str(exc))
|
|
56
|
+
raise CacheSyncError(str(exc)) from exc
|
|
57
|
+
|
|
58
|
+
async def flush(self, *, skip_if_running: bool = False) -> None:
|
|
59
|
+
"""Push pending local score events to the remote database.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
skip_if_running: If True, return immediately if another flush is already running.
|
|
63
|
+
If False (default), wait for any running flush to complete.
|
|
64
|
+
"""
|
|
65
|
+
if skip_if_running and self._flush_lock.locked():
|
|
66
|
+
return
|
|
67
|
+
async with self._flush_lock:
|
|
68
|
+
events = await self._local.get_pending_events()
|
|
69
|
+
for evt in events:
|
|
70
|
+
eid = str(evt["event_id"])
|
|
71
|
+
try:
|
|
72
|
+
await self._remote.apply_score_delta(
|
|
73
|
+
event_id=eid,
|
|
74
|
+
source_word_id=int(evt["source_word_id"]),
|
|
75
|
+
exercise_type=str(evt["exercise_type"]),
|
|
76
|
+
delta=int(evt["delta"]),
|
|
77
|
+
)
|
|
78
|
+
await self._local.mark_event_flushed(eid)
|
|
79
|
+
except Exception as exc:
|
|
80
|
+
_log.warning("flush failed for event %s: %s", eid, exc)
|
|
81
|
+
await self._local.mark_event_failed(eid, str(exc))
|
|
82
|
+
await self._local.update_metadata(last_flush_completed_at=datetime.now(tz=UTC).isoformat())
|
|
@@ -6,7 +6,7 @@ Usage:
|
|
|
6
6
|
This script:
|
|
7
7
|
1. Generates synthetic test images and encodes real photos
|
|
8
8
|
2. Encodes them to base64
|
|
9
|
-
3. Builds a ChatPromptTemplate with
|
|
9
|
+
3. Builds a ChatPromptTemplate with 7 few-shot examples (HumanMessage + AIMessage + ToolMessage triplets)
|
|
10
10
|
4. Serializes with dumpd() and saves to nl.json
|
|
11
11
|
|
|
12
12
|
The script is the source of truth — nl.json is the generated artifact.
|
|
@@ -92,6 +92,9 @@ EXAMPLE_5_EXPECTED = ""
|
|
|
92
92
|
EXAMPLE_6_TEXT = "Please take your shoes off before entering the house"
|
|
93
93
|
EXAMPLE_6_EXPECTED = ""
|
|
94
94
|
|
|
95
|
+
EXAMPLE_7_TEXT = "Remember to bring your umbrella tomorrow"
|
|
96
|
+
EXAMPLE_7_EXPECTED = ""
|
|
97
|
+
|
|
95
98
|
OUTPUT_PATH = Path(__file__).parent / "nl.json"
|
|
96
99
|
|
|
97
100
|
|
|
@@ -128,13 +131,14 @@ def _make_example_ai(expected_text: str, call_id: str) -> AIMessage:
|
|
|
128
131
|
|
|
129
132
|
|
|
130
133
|
def build_prompt() -> ChatPromptTemplate:
|
|
131
|
-
"""Build the Dutch extraction prompt with
|
|
134
|
+
"""Build the Dutch extraction prompt with 7 few-shot examples."""
|
|
132
135
|
img1 = _generate_image_b64(EXAMPLE_1_TEXT)
|
|
133
136
|
img2 = _generate_image_b64(EXAMPLE_2_TEXT)
|
|
134
137
|
img3 = _encode_existing_image_b64(EXAMPLE_3_IMAGE)
|
|
135
138
|
img4 = _encode_existing_image_b64(EXAMPLE_4_IMAGE)
|
|
136
139
|
img5 = _generate_image_b64(EXAMPLE_5_TEXT)
|
|
137
140
|
img6 = _generate_image_b64(EXAMPLE_6_TEXT)
|
|
141
|
+
img7 = _generate_image_b64(EXAMPLE_7_TEXT)
|
|
138
142
|
|
|
139
143
|
return ChatPromptTemplate.from_messages([
|
|
140
144
|
SystemMessage(content=SYSTEM_INSTRUCTION),
|
|
@@ -156,6 +160,9 @@ def build_prompt() -> ChatPromptTemplate:
|
|
|
156
160
|
_make_example_human(img6),
|
|
157
161
|
_make_example_ai(EXAMPLE_6_EXPECTED, "call_example_6"),
|
|
158
162
|
ToolMessage(content=EXAMPLE_6_EXPECTED, tool_call_id="call_example_6"),
|
|
163
|
+
_make_example_human(img7),
|
|
164
|
+
_make_example_ai(EXAMPLE_7_EXPECTED, "call_example_7"),
|
|
165
|
+
ToolMessage(content=EXAMPLE_7_EXPECTED, tool_call_id="call_example_7"),
|
|
159
166
|
MessagesPlaceholder(variable_name="images"),
|
|
160
167
|
])
|
|
161
168
|
|
{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/prompts/nl.json
RENAMED
|
@@ -398,6 +398,68 @@
|
|
|
398
398
|
"status": "success"
|
|
399
399
|
}
|
|
400
400
|
},
|
|
401
|
+
{
|
|
402
|
+
"lc": 1,
|
|
403
|
+
"type": "constructor",
|
|
404
|
+
"id": [
|
|
405
|
+
"langchain",
|
|
406
|
+
"schema",
|
|
407
|
+
"messages",
|
|
408
|
+
"HumanMessage"
|
|
409
|
+
],
|
|
410
|
+
"kwargs": {
|
|
411
|
+
"content": [
|
|
412
|
+
{
|
|
413
|
+
"type": "image_url",
|
|
414
|
+
"image_url": {
|
|
415
|
+
"url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAyAAAADICAIAAACf7RJNAAAXuklEQVR4Ae3BAYor14IFwcz9LzoHLggk1KXu51/tsfGJsGJmZmZm7mPFzMzMzNzHipmZmZm5jxUzMzMzcx8rZmZmZuY+VszMzMzMfayYmZmZmftYMTMzMzP3sWJmZmZm7mPFzMzMzNzHipmZmZm5jxUzMzMzcx8rZmZmZuY+VszMf4nKUTEzM7/Aipuo/EDF/D9RK/5L1Ip/NrXib6HypGLm30Ot+FdRKy6oFf9IasX8JVbcQeUPVczfSOWo+G9QOSr+qVSOit+n8qRi5l9C5aj4l1A5Kt6oHBX/MCpHxfw5K+6g8ucq5u+iclT8N6gcFf9UKkfF71N5UjHzL6FyVPxLqBwVb1SOin8YlaNi/pwVd1A5Kj5SeaiYv4vKUfHfoHJU/FOpHBV/C5WjYubfQ+Wo+JdQOSreqBwV/zAqR8X8OSvuoHJUfEfloWL+FipHxX+DylHxT6VyVMzMNZWj4l9C5ah4o3JU/MOoHBXz56y4g8pR8QMqR8X8LVSOiv8GlaPin0rlqJiZaypHxb+EylHxRuWo+IdROSrmz1lxB5Wj4mdUjor5fSpHxX+DylHxT6VyVMzMNZWj4l9C5ah4o3JU/MOoHBXz56y4g8pR8TMqR8X8PpWj4r9B5aj4p1I5KmbmmspR8S+hclS8UTkq/mFUjor5c1bcQeWo+BmVo+KayquK76gVDyqvKt6ovKr4EypPKr6jVjxReVLxFZUnFR+pHBWHypOKP6TyquI7asWDypOK/43KhYrvqLyq+DUqRwWovKr4AbXiQeVJxRO14oJa8aDyquJPqDypeFArbqJW/IxacU3lVcUPqBXfUSu+olY8UXlScQe14jtqxQW14onKk4qvqDyp+EjlqACVVxU/oFY8qDypuKbypOIjlaPijcpR8R2VJxWHWnEflQsV31F5UvEDasUTlScVX1F5VfEnVJ5U/IBa8UTliRV3UDkqfkDlqLigcq3igspRqVyrOFSuVXxH5ULFBZWjAlQuVDyoXKi4oHJUKhcqfkDlWsUFlaNS+UrFX6XyUcUFlWsVv0DlByquqRyVylcqDpWj4o3KUalcq/iOyoVK5aj4n6kcFd9ROSreqFyruKZyVFxTOSpeqRwVoPKViv+NylFxTeWoeKNyVIDKhYoHlQsVF1R+oOKaylGpfKXijcqFigsqR8UblaPimsp3Ku6g8lHFBZULFddUjgpQuVDxoHKt4jsqFyquqRwVoPLGijuoHBXfUXmo+IrKdyq+ovJjlcp3Kq6pfFTxFZWjUvmoAlQ+qviKys9UfKTynYqvqHxU8b9RuVBxQeU7FXdT+ZmKCyofVTyoHBVvVH6s4prKz1T8z1QeKq6pPFS8UvlOxQWVo+KaylHxSuWoVL5S8T9TOSquqRwVb1SOSuWjClD5qOIrKj9TcUHlo4o3Kh9VfEXlqHijclRcUPmBipuoXKi4oPJRxQWVo1L5qAJUvlNxTeWjigsqR6XyFSvuoHJUXFN5UvEVlScVDypPKt6ovKl4ULlQ8aDyUHFB5aHiicpDxRuVNxUPKg+VykPFE5WHijcqryoeVJ5UXFB5UvGg8qTijcqbil+gclR8pPKk4kHlScWtVF5VPKg8qfiKypuKr6gcFW9U3lQ8qDxUXFB5qHii8qriDipHxTWVo+KVypOKB5UnFV9ROSquqRwVr1TeVNxN5ai4pnJUvFF5U/Gg8lCpPFQ8qDxUfEXlVcWDypOKr6i8qbim8lDxoPKk4o3KUfFG5aj4ispDxYPKq4pbqRwV31F5qHii8lDxFZU3FU9ULlQ8qDxUXFB5qHhQeVLxFZU3FU+suIPKH6r4ispDxVdUjoo3Kk8q3qi8qnijclR8ReWh4isqR8UrlVcVb1ReVbxROSreqDypeKPyUPFG5aHiKypHxRuVJxW/RuWouKbyUPEVlYeK+6g8VHxF5aHijcqTimsqR8UblScVb1SOiq+oPFS8UXlScQeVo+KCykPFE5WHiq+oPFS8UTkqrqkcFa9UXlX8ApWj4prKUfFG5UnFG5U3FW9Ujoo3Kg8VX1F5qHij8qTimsqTijcqDxWvVI6KNypHxSuVJxVvVB4qbqVyVHyk8lDxRuVJxSuVVxVvVF5VvFE5Kt6oPKl4o/Kk4pXKq4pXVtxB5Q9VfEXloeKCylHxSuWh4isqTyouqBwVr1QeKi6oHBWvVJ5UfEXlScVXVI6KNyoPFRdUHipeqTxUXFA5Kl6pPFT8JpWj4prKUXFB5aHiPipHxQWVh4o3Kg8VH6kcFW9UHiouqBwVr1QeKi6oPFTcROWo+IrKUfFK5ai4oPJQ8UblqLimclS8UnlS8TtUjoprKkfFG5UnFV9ReVLxFZWj4o3KQ8UFlaPijcqTimsqDxUXVI6KVypHxRuVo+KVykPFBZWj4lYqR8U1lYeKCyoPFa9UnlR8ReVJxVdUHipeqTxUXFB5qHil8qTijRV3UPmBio9UHiquqRwVr1SOimsqR8U1laPilcpR8ZHKUfFE5aHigspDxQWVh4pXKkfFRypHxROVh4prKkfFK5WHit+kclRcUHmouKbyUHETlaPimspDxSuVh4qPVI6KNypHxTWVo+KVylHxkcpRcROVo+IrKkfFE5WHimsqDxWvVI6KaypHxSuVh4pfo3JUXFM5Kt6oPFRcUHmouKByVLxROSquqTxUvFJ5qPhI5aj4SOWoeKJyVLxROSpeqRwV11SOilupHBXXVI6Kj1SOilcqDxUXVB4qrqkcFa9UjoqPVI6KVyoPFV+x4g4qR8UblaPiI5U/VPFK5ai4pnJUXFM5Kl6p/KGKJypHxTWVo+IjlaPilcpR8ZHKUfFE5Q9VvFI5Kn6ZylFxQeWo+I7KUXETlaPiI5Wj4pXKUfEdlaPijcpRcU3lqHilclR8pHJU3ETloeKVylHxSuWo+I7KUfFK5ai4pnJUvFJ5qPg1KkfFNZWj4o3KUXFN5ai4pnJUvFE5Kj5SOSpeqRwVH6n8uYonKkfFG5Wj4onKQ8U1laPiVipHxTWVo+IjlYeKJypHxUcqR8U1laPiicpDxUcqDxVPVB4qvmLFHVSOijcqDxXXVP5QxSuVo+KaylFxTeWoeKLy5yqeqBwV11SOio9UjopXKkfFRyoPFQ8qf6jilcpR8ctUjooLKkfFd1SOipuoHBUfqRwVr1SOiu+oHBVvVI6KaypHxROVh4qPVI6K+6gcFa9UjopXKkfFd1SOilcqR8U1laPilcpDxa9ROSquqRwVb1SOimsqR8U1laPijcpR8ZHKUfFK5aj4SOXPVTxROSreqBwVT1SOio9UjopbqRwVF1QeKr6jclQ8UTkqPlI5Kq6pHBVPVI6KH1A5Kp6oHBUXrLiDylHxFZWj4prKn6h4o3JUXFM5Kq6pHBVPVP5QxSuVo+KaylHxkcpR8UrlqPiOylHxoPInKt6oHBW/TOWouKByVHxH5ai4icpR8ZHKUfFK5aj4jspR8UblqLimclQ8UTkqvqNyVNxH5aHiQeWh4pXKUfEdlaPilcpRcU3lqHilclT8JpWj4prKUfFG5ai4pnJUXFM5Kt6oHBUfqRwVr1SOio9U/lDFK5Wj4o3KUfFE5aj4SOWouJXKUXFB5aj4AZWj4onKUfGRylFxTeWoeKJyVPyAylHxROWouGDFHVSOiq+oPFRcUDkq/iqVo+KaylFxTeWoeKJyVPxVKkfFNZWj4iOVo+KVylHxHZWj4kHlqPirVI6KX6ZyVFxQOSq+o3JU3ETlqPhI5ah4pXJUfEflqHijclRcUzkqnqgcFd9ROSpupXJUPKgcFW9UjorvqBwVr1SOimsqR8UrlaPiN6kcFddUjoo3KkfFNZWj4prKUfFG5aj4SOWoeKVyVHykclT8VSpHxRuVo+KJylHxkcpRcSuVo+KCylHxAypHxROVo+IjlaPimspR8UTlqPgBlaPiicpRccGKO6gcFRdUHiq+onJU/FUqR8U1laPimspR8UTloeIvUTkqrqkcFR+pHBWvVI6Kj1QeKh5Ujoq/SuWo+GUqR8UFlaPiOypHxU1UjoqPVI6KVypHxXdUjoo3KkfFNZWj4onKQ8VHKkfFrVSOikPloeKNylHxHZWj4pXKUXFN5ah4pXJU/CaVo+KaylHxRuWouKZyVFxTOSreqBwVH6kcFa9UjoqPVI6Kv0rlqHijclQ8UTkqPlI5Km6lclRcUHmo+I7KUfFE5aj4SOWouKZyVDxROSp+QOWoeKJyVFyw4g4qR8U1laPiKypHxV+lclRcUzkqrqkcFa9Ujoq/ROWouKZyVHykclS8UjkqPlI5Kp6oHBV/lcpR8ctUjooLKkfFd1SOipuoHBUfqRwVr1SOiu+oHBVvVI6KaypHxSuVo+IjlaPibipHBagcFV9ROSq+o3JUvFI5Kq6pHBWvVI6K36RyVFxTOSreqBwV11SOimsqR8UblaPiI5Wj4pXKUfEdlaPiL1E5Kt6oHBVPVB4qrqkcFbdSOSquqRwVH6k8VDxROSo+UjkqrqkcFa9UjoqPVI6KVypHxQUr7qByVFxTeah4o/JQ8ZFa8UblqLimclRcUzkqXqkcFd9RK16pHBXXVI6Kj1SOilcqR8VHKkfFE5WHio/UijcqR8UvUzkqLqg8VFxTeai4icpRcU3loeKVylHxHZWj4o3KUXFN5ah4pXJUfKRyVNxN5agAlaPiKyoPFddUHipeqRwVF1QeKl6pHBW/SeWouKZyVLxROSquqRwV11SOijcqR8U1lYeKVypHxXdUjorvqBWvVI6KNypHxSuVo+KaylFxK5Wj4prKUfGRylHxSuWo+EjlqLimclS8UjkqPlI5Kl6pHBUXrLiDylHxkcpDxRuVo+KaylHxSuWouKZyVFxTOSpeqTxUXFM5Kp6oHBXXVI6Kj1SOilcqR8U1lYeKVypHxTWVo+KVylHxy1SOimsqR8U1laPiPipHxTWVo+KNylHxHZWj4o3KUXFN5ah4pfJQcU3lqLibypuKaypHxTWVo+KNylHxFZUnFa9UjorfpPJQ8RWVh4o3KkfFNZWj4prKUfFG5ai4pnJUvFE5Kr6jclR8pHJUPFE5Kt6oHBWvVI6KaypHxa1UHiouqDxUXFM5Kl6pHBUfqRwV11SOilcqR8VHKkfFK5Wj4oIVd1A5Kj5Seah4o/JQ8RWVh4pXKkfFNZWj4prKUfFG5aHiKyoPFU9UjoprKkfFRypHxSuVh4qvqDxUvFF5qPiKykPFK5Wj4pepHBXXVB4qvqLyUHEflYeKr6g8VLxROSq+o3JUvFE5Kq6pHBVvVB4qvqLyUPELVF5VXFN5qPiKykPFG5Wj4o3Kq4pXKkfFL1M5Kt6oPKl4o3JUXFM5Kq6pHBVvVB4qvqLyUPFG5aj4jspDxQWVo+KVylHxRuWoeKXyUPEVlYeKu6kcFddUHiq+ovJQ8UrlqPhI5ai4pnJUvFJ5qLig8lDxSuWouGDFHVSOiu+oPFS8UXlS8UTloeKNylFxTeWouKZyVHxF5aHilcpDxSuVo+KaylHxkcpR8UrlVcUTlYeKCypPKp6oPFS8UTkqfpnKQ8U1lScVT1QeKm6l8qriicpDxVdUjorvqBwVb1SOimsqR8VXVB4qXqk8qfgFKq8qPlJ5UvFE5aHiKyoPFU9U3lS8UjkqfpnKQ8UTlVcVb1SOimsqR8U1laPijcqriicqDxVfUTkqfkDlScUTlScVr1SOijcqR8UblScVT1SeVNxN5aHimspDxSuVh4o3KkfFRypHxTWVo+KNypOKJypPKt6oHBUXrLiDylHxAypHxVdUvlPxFZWj4prKUXFN5aj4isoPVLxROSquqRwVH6kcFa9Ufqzimsp3Kr6iclT8PpU3FW9UvlNxN5WfqbigclR8R+WoeKNyVFxTOSq+ovJjFb9D5aHiB1S+U3FN5ccqXqkcFb9M5ccq3qgcFddUjoprKkfFG5WfqbigclT8jMoPVLxROSreqBwVX1H5mYq7qbypeKPyAxVfUTkqPlI5Kq6pHBVfUfmBiq+oHBUXrLiDylHxAyoPFV9R+ajiKypHxTWVo+KaylFxQeWjiq+oHBXXVI6Kj1SOilcqP1PxHZWPKr6iclT8PpWvVLxS+ajiF6j8QMU1laPiOypHxRuVo+KaylFxQeVnKn6HykPFD6h8VPGRykeVylHxSuWo+H0qP1PxRuWouKZyVFxTOSreqPxMxQWVo+LHVD6q+IrKUfFG5ai4oPIDFb9A5U3FG5WPKi6oHBUfqRwV11SOigsqH1VcUDkqLlhxE7Xix1Sg4iOVVxXfUSu+o1Z8R634AZUnFd9RK76jVvyAWvEVteJQeVXxh1ReVXxHrfgbqTypuKbyquI3qRWHyquKH1ArfkatuKBWfEet+AGVJxWHylHxO1SOij+k8qrix1ReVTxRK76iVvyNVF5VPKgVF9SK76gV31ErLqgVh8qrih9QK/6cyquK76gVF9SK76i8qjjUit+k8lDxkcqTih9QK35ArfiOWvEdlVcVP6BWXLNiZub/g8pR8QtUjoqZmb+RFTMz/x9UjopfoHJUzMz8jayYmfnbqRwVv0DlqJiZ+XtZMTPzt1M5Ku6m8lAxM/P3smJm5neoFW9UHipupfKkYmbm72XFzMwvUPlOxU1U3lTMzPztrJiZ+R0q1ypuovKmYmbm/4MVMzO/RuVNxd1UHipmZv7/WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPv8HPt/pW2VCUvMAAAAASUVORK5CYII="
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
],
|
|
419
|
+
"type": "human"
|
|
420
|
+
}
|
|
421
|
+
},
|
|
422
|
+
{
|
|
423
|
+
"lc": 1,
|
|
424
|
+
"type": "constructor",
|
|
425
|
+
"id": [
|
|
426
|
+
"langchain",
|
|
427
|
+
"schema",
|
|
428
|
+
"messages",
|
|
429
|
+
"AIMessage"
|
|
430
|
+
],
|
|
431
|
+
"kwargs": {
|
|
432
|
+
"content": "",
|
|
433
|
+
"type": "ai",
|
|
434
|
+
"tool_calls": [
|
|
435
|
+
{
|
|
436
|
+
"name": "ExtractedText",
|
|
437
|
+
"args": {
|
|
438
|
+
"text": ""
|
|
439
|
+
},
|
|
440
|
+
"id": "call_example_7",
|
|
441
|
+
"type": "tool_call"
|
|
442
|
+
}
|
|
443
|
+
],
|
|
444
|
+
"invalid_tool_calls": []
|
|
445
|
+
}
|
|
446
|
+
},
|
|
447
|
+
{
|
|
448
|
+
"lc": 1,
|
|
449
|
+
"type": "constructor",
|
|
450
|
+
"id": [
|
|
451
|
+
"langchain",
|
|
452
|
+
"schema",
|
|
453
|
+
"messages",
|
|
454
|
+
"ToolMessage"
|
|
455
|
+
],
|
|
456
|
+
"kwargs": {
|
|
457
|
+
"content": "",
|
|
458
|
+
"type": "tool",
|
|
459
|
+
"tool_call_id": "call_example_7",
|
|
460
|
+
"status": "success"
|
|
461
|
+
}
|
|
462
|
+
},
|
|
401
463
|
{
|
|
402
464
|
"lc": 1,
|
|
403
465
|
"type": "constructor",
|
|
File without changes
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nl_processing
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: Natural language processing playground
|
|
5
5
|
Requires-Python: >=3.12
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -9,6 +9,7 @@ Requires-Dist: langchain<1,>=0.3
|
|
|
9
9
|
Requires-Dist: langchain-openai<1,>=0.3
|
|
10
10
|
Requires-Dist: opencv-python<5,>=4.10
|
|
11
11
|
Requires-Dist: asyncpg<1,>=0.30
|
|
12
|
+
Requires-Dist: aiosqlite<1,>=0.20
|
|
12
13
|
|
|
13
14
|
# nl_processing
|
|
14
15
|
|
|
@@ -12,7 +12,6 @@ nl_processing/core/models.py
|
|
|
12
12
|
nl_processing/core/prompts.py
|
|
13
13
|
nl_processing/core/scripts/prompt_author.py
|
|
14
14
|
nl_processing/database/__init__.py
|
|
15
|
-
nl_processing/database/cached_service.py
|
|
16
15
|
nl_processing/database/exceptions.py
|
|
17
16
|
nl_processing/database/exercise_progress.py
|
|
18
17
|
nl_processing/database/logging.py
|
|
@@ -24,6 +23,14 @@ nl_processing/database/backend/_neon_exercise.py
|
|
|
24
23
|
nl_processing/database/backend/_queries.py
|
|
25
24
|
nl_processing/database/backend/abstract.py
|
|
26
25
|
nl_processing/database/backend/neon.py
|
|
26
|
+
nl_processing/database_cache/__init__.py
|
|
27
|
+
nl_processing/database_cache/_local_store_queries.py
|
|
28
|
+
nl_processing/database_cache/exceptions.py
|
|
29
|
+
nl_processing/database_cache/local_store.py
|
|
30
|
+
nl_processing/database_cache/logging.py
|
|
31
|
+
nl_processing/database_cache/models.py
|
|
32
|
+
nl_processing/database_cache/service.py
|
|
33
|
+
nl_processing/database_cache/sync.py
|
|
27
34
|
nl_processing/extract_text_from_image/__init__.py
|
|
28
35
|
nl_processing/extract_text_from_image/benchmark.py
|
|
29
36
|
nl_processing/extract_text_from_image/image_encoding.py
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nl_processing"
|
|
7
|
-
version = "0.
|
|
7
|
+
version = "0.4.0"
|
|
8
8
|
description = "Natural language processing playground"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.12"
|
|
@@ -14,6 +14,7 @@ dependencies = [
|
|
|
14
14
|
"langchain-openai>=0.3,<1",
|
|
15
15
|
"opencv-python>=4.10,<5",
|
|
16
16
|
"asyncpg>=0.30,<1",
|
|
17
|
+
"aiosqlite>=0.20,<1",
|
|
17
18
|
]
|
|
18
19
|
|
|
19
20
|
[tool.setuptools.packages.find]
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
"""CachedDatabaseService — wraps DatabaseService with in-memory LRU cache.
|
|
2
|
-
|
|
3
|
-
.. deprecated::
|
|
4
|
-
Legacy prototype helper; superseded by planned database_cache module.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from nl_processing.core.models import Language, PartOfSpeech, Word
|
|
8
|
-
from nl_processing.database.models import AddWordsResult, WordPair
|
|
9
|
-
from nl_processing.database.service import DatabaseService
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class CachedDatabaseService:
|
|
13
|
-
"""Wraps DatabaseService with an in-memory LRU cache for get_words.
|
|
14
|
-
|
|
15
|
-
.. deprecated::
|
|
16
|
-
Legacy prototype helper; superseded by planned database_cache module.
|
|
17
|
-
"""
|
|
18
|
-
|
|
19
|
-
def __init__(
|
|
20
|
-
self,
|
|
21
|
-
*,
|
|
22
|
-
user_id: str,
|
|
23
|
-
source_language: Language = Language.NL,
|
|
24
|
-
target_language: Language = Language.RU,
|
|
25
|
-
cache_max_size: int = 128,
|
|
26
|
-
) -> None:
|
|
27
|
-
self._inner = DatabaseService(
|
|
28
|
-
user_id=user_id,
|
|
29
|
-
source_language=source_language,
|
|
30
|
-
target_language=target_language,
|
|
31
|
-
)
|
|
32
|
-
self._cache: dict[tuple[str | None, int | None, bool], list[WordPair]] = {}
|
|
33
|
-
self._cache_max_size = cache_max_size
|
|
34
|
-
self._cache_order: list[tuple[str | None, int | None, bool]] = []
|
|
35
|
-
|
|
36
|
-
async def add_words(self, words: list[Word]) -> AddWordsResult:
|
|
37
|
-
"""Delegate to inner service and clear the cache."""
|
|
38
|
-
result = await self._inner.add_words(words)
|
|
39
|
-
self._cache.clear()
|
|
40
|
-
self._cache_order.clear()
|
|
41
|
-
return result
|
|
42
|
-
|
|
43
|
-
async def get_words(
|
|
44
|
-
self,
|
|
45
|
-
*,
|
|
46
|
-
word_type: PartOfSpeech | None = None,
|
|
47
|
-
limit: int | None = None,
|
|
48
|
-
random: bool = False,
|
|
49
|
-
) -> list[WordPair]:
|
|
50
|
-
"""Return word pairs, serving from cache when possible.
|
|
51
|
-
|
|
52
|
-
Random queries and zero-size caches bypass the cache entirely.
|
|
53
|
-
"""
|
|
54
|
-
if random or self._cache_max_size <= 0:
|
|
55
|
-
return await self._inner.get_words(
|
|
56
|
-
word_type=word_type,
|
|
57
|
-
limit=limit,
|
|
58
|
-
random=random,
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
key = (word_type.value if word_type else None, limit, False)
|
|
62
|
-
if key in self._cache:
|
|
63
|
-
self._cache_order.remove(key)
|
|
64
|
-
self._cache_order.append(key)
|
|
65
|
-
return self._cache[key]
|
|
66
|
-
|
|
67
|
-
result = await self._inner.get_words(
|
|
68
|
-
word_type=word_type,
|
|
69
|
-
limit=limit,
|
|
70
|
-
random=random,
|
|
71
|
-
)
|
|
72
|
-
self._cache[key] = result
|
|
73
|
-
self._cache_order.append(key)
|
|
74
|
-
while len(self._cache_order) > self._cache_max_size:
|
|
75
|
-
oldest = self._cache_order.pop(0)
|
|
76
|
-
self._cache.pop(oldest, None)
|
|
77
|
-
return result
|
|
78
|
-
|
|
79
|
-
@classmethod
|
|
80
|
-
async def create_tables(cls) -> None:
|
|
81
|
-
"""Delegate to DatabaseService.create_tables."""
|
|
82
|
-
await DatabaseService.create_tables()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/__init__.py
RENAMED
|
File without changes
|
{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/benchmark.py
RENAMED
|
File without changes
|
{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/image_encoding.py
RENAMED
|
File without changes
|
{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/service.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_words_from_text/prompts/nl.json
RENAMED
|
File without changes
|
{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_words_from_text/service.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|