PyPI - nl-processing - Versions diffs - 0.3.0__tar.gz → 0.4.0__tar.gz - Mend

nl-processing 0.3.0tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (56) hide show

{nl_processing-0.3.0 → nl_processing-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nl_processing
-Version: 0.3.0
+Version: 0.4.0
 Summary: Natural language processing playground
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown
@@ -9,6 +9,7 @@ Requires-Dist: langchain<1,>=0.3
 Requires-Dist: langchain-openai<1,>=0.3
 Requires-Dist: opencv-python<5,>=4.10
 Requires-Dist: asyncpg<1,>=0.30
+Requires-Dist: aiosqlite<1,>=0.20
 # nl_processing

{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/_neon_exercise.py RENAMED Viewed

@@ -90,3 +90,30 @@ async def mark_event(
         await conn.execute(mark_event_applied_query(table), event_id)
     except asyncpg.PostgresError as exc:
         raise DatabaseError(str(exc)) from exc
+async def atomic_apply_delta(
+    conn: asyncpg.Connection,  # type: ignore[type-arg]
+    score_table: str,
+    events_table: str,
+    user_id: str,
+    event_id: str,
+    source_word_id: int,
+    delta: int,
+) -> bool:
+    """Atomically check-apply-mark a score delta in one transaction."""
+    try:
+        async with conn.transaction():
+            already = await conn.fetchrow(check_event_applied_query(events_table), event_id)
+            if already is not None:
+                return False
+            await conn.fetchrow(
+                increment_score_query(score_table),
+                user_id,
+                source_word_id,
+                delta,
+            )
+            await conn.execute(mark_event_applied_query(events_table), event_id)
+            return True
+    except asyncpg.PostgresError as exc:
+        raise DatabaseError(str(exc)) from exc

{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/_queries.py RENAMED Viewed

@@ -133,6 +133,19 @@ def get_user_words_query(
     return query
+def count_user_words_query(language: str, word_type: str | None) -> str:
+    # Table name from Language enum value, not user input  # noqa: S608
+    query = f"""
+        SELECT COUNT(*) AS cnt
+        FROM user_words uw
+        JOIN words_{language} w ON uw.word_id = w.id
+        WHERE uw.user_id = $1 AND uw.language = $2
+    """  # noqa: S608
+    if word_type is not None:
+        query += " AND w.word_type = $3"
+    return query
 def increment_score_query(table: str) -> str:
     # Table name from Language enum values, not user input  # noqa: S608
     return f"""

{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/abstract.py RENAMED Viewed

@@ -53,6 +53,15 @@ class AbstractBackend(ABC):
         and random ordering.
         """
+    @abstractmethod
+    async def count_user_words(
+        self,
+        user_id: str,
+        language: str,
+        word_type: str | None = None,
+    ) -> int:
+        """Return total user-word associations for the given user and language."""
     @abstractmethod
     async def add_user_word(
         self,
@@ -103,6 +112,22 @@ class AbstractBackend(ABC):
     ) -> None:
         """Insert event_id into the applied_events table."""
+    @abstractmethod
+    async def apply_score_delta_atomic(
+        self,
+        score_table: str,
+        events_table: str,
+        user_id: str,
+        event_id: str,
+        source_word_id: int,
+        delta: int,
+    ) -> bool:
+        """Atomically check-apply-mark a score delta in one transaction.
+        Returns True if the delta was applied, False if event_id was already applied.
+        The entire operation (check + increment + mark) runs in a single transaction.
+        """
     @abstractmethod
     async def create_tables(
         self,

{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/backend/neon.py RENAMED Viewed

@@ -1,8 +1,9 @@
-"""NeonBackend — asyncpg implementation of AbstractBackend for Neon PostgreSQL."""
+"""NeonBackend asyncpg implementation for Neon PostgreSQL."""
 import asyncpg
 from nl_processing.database.backend._neon_exercise import (
+    atomic_apply_delta,
     check_event,
     create_exercise_tables,
     get_scores,
@@ -14,6 +15,7 @@ from nl_processing.database.backend._queries import (
     CREATE_USER_WORDS,
     add_translation_link_query,
     add_word_query,
+    count_user_words_query,
     create_translations_table,
     create_words_table,
     get_user_words_query,
@@ -34,7 +36,6 @@ class NeonBackend(AbstractBackend):
         self._connection: asyncpg.Connection | None = None  # type: ignore[type-arg]
     async def _connect(self) -> asyncpg.Connection:  # type: ignore[type-arg]
-        """Return cached connection, creating it lazily on first call."""
         if self._connection is None:
             try:
                 self._connection = await asyncpg.connect(dsn=self._database_url)
@@ -43,13 +44,12 @@ class NeonBackend(AbstractBackend):
                 raise DatabaseError(str(exc)) from exc
             except OSError as exc:
                 raise DatabaseError(str(exc)) from exc
+        if self._connection is None:
+            raise DatabaseError("Database connection was not initialized")
         return self._connection
     async def create_tables(
-        self,
-        languages: list[str],
-        pairs: list[tuple[str, str]],
-        exercise_slugs: list[str],
+        self, languages: list[str], pairs: list[tuple[str, str]], exercise_slugs: list[str]
     ) -> None:
         conn = await self._connect()
         try:
@@ -78,11 +78,7 @@ class NeonBackend(AbstractBackend):
             return None
         return int(row["id"])
-    async def get_word(
-        self,
-        table: str,
-        normalized_form: str,
-    ) -> dict[str, str | int] | None:
+    async def get_word(self, table: str, normalized_form: str) -> dict[str, str | int] | None:
         conn = await self._connect()
         try:
             row = await conn.fetchrow(get_word_query(table), normalized_form)
@@ -96,24 +92,14 @@ class NeonBackend(AbstractBackend):
             "word_type": row["word_type"],
         }
-    async def add_translation_link(
-        self,
-        table: str,
-        source_id: int,
-        target_id: int,
-    ) -> None:
+    async def add_translation_link(self, table: str, source_id: int, target_id: int) -> None:
         conn = await self._connect()
         try:
             await conn.execute(add_translation_link_query(table), source_id, target_id)
         except asyncpg.PostgresError as exc:
             raise DatabaseError(str(exc)) from exc
-    async def add_user_word(
-        self,
-        user_id: str,
-        word_id: int,
-        language: str,
-    ) -> None:
+    async def add_user_word(self, user_id: str, word_id: int, language: str) -> None:
         conn = await self._connect()
         try:
             await conn.execute(ADD_USER_WORD, user_id, word_id, language)
@@ -144,6 +130,19 @@ class NeonBackend(AbstractBackend):
             raise DatabaseError(str(exc)) from exc
         return [dict(row) for row in rows]
+    async def count_user_words(self, user_id: str, language: str, word_type: str | None = None) -> int:
+        conn = await self._connect()
+        args: list[str] = [user_id, language]
+        if word_type is not None:
+            args.append(word_type)
+        try:
+            count = await conn.fetchval(count_user_words_query(language, word_type), *args)
+        except asyncpg.PostgresError as exc:
+            raise DatabaseError(str(exc)) from exc
+        if count is None:
+            return 0
+        return int(count)
     async def increment_user_exercise_score(
         self,
         table: str,
@@ -155,36 +154,29 @@ class NeonBackend(AbstractBackend):
         return await increment_score(conn, table, user_id, source_word_id, delta)
     async def get_user_exercise_scores(
-        self,
-        table: str,
-        user_id: str,
-        source_word_ids: list[int],
+        self, table: str, user_id: str, source_word_ids: list[int]
     ) -> list[dict[str, str | int]]:
         conn = await self._connect()
         return await get_scores(conn, table, user_id, source_word_ids)
-    async def check_event_applied(
-        self,
-        table: str,
-        event_id: str,
-    ) -> bool:
+    async def check_event_applied(self, table: str, event_id: str) -> bool:
         conn = await self._connect()
         return await check_event(conn, table, event_id)
-    async def mark_event_applied(
-        self,
-        table: str,
-        event_id: str,
-    ) -> None:
+    async def mark_event_applied(self, table: str, event_id: str) -> None:
         conn = await self._connect()
         await mark_event(conn, table, event_id)
+    async def apply_score_delta_atomic(
+        self, score_table: str, events_table: str,
+        user_id: str, event_id: str, source_word_id: int, delta: int,
+    ) -> bool:  # fmt: skip
+        conn = await self._connect()
+        return await atomic_apply_delta(conn, score_table, events_table, user_id, event_id, source_word_id, delta)
-def _infer_target_language(source_language: str) -> str:
-    """Infer the target language for translation lookups.
-    With only two languages (nl, ru), the target is always the other one.
-    """
+def _infer_target_language(source_language: str) -> str:
+    """Infer the other language in the nl/ru pair."""
     if source_language == "nl":
         return "ru"
     return "nl"

{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/exercise_progress.py RENAMED Viewed

@@ -119,25 +119,21 @@ class ExerciseProgressStore:
     ) -> None:
         """Apply a score delta idempotently, guarded by event deduplication.
-        Validates exercise_type. Skips if event_id was already applied.
+        Validates exercise_type and delta. Skips if event_id was already applied.
+        The check-increment-mark operation is atomic (single transaction).
         """
         self._validate_exercise_type(exercise_type)
-        already_applied = await self._backend.check_event_applied(
-            self._applied_events_table,
-            event_id,
-        )
-        if already_applied:
-            return
+        if delta not in (1, -1):
+            msg = f"delta must be +1 or -1, got {delta}"
+            raise ValueError(msg)
         table = self._score_tables[exercise_type]
-        await self._backend.increment_user_exercise_score(
-            table,
-            self._user_id,
-            source_word_id,
-            delta,
-        )
-        await self._backend.mark_event_applied(
-            self._applied_events_table,
-            event_id,
+        await self._backend.apply_score_delta_atomic(
+            score_table=table,
+            events_table=self._applied_events_table,
+            user_id=self._user_id,
+            event_id=event_id,
+            source_word_id=source_word_id,
+            delta=delta,
         )
     def _validate_exercise_type(self, exercise_type: str) -> None:

{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/database/service.py RENAMED Viewed

@@ -140,6 +140,19 @@ class DatabaseService:
                 language=self._target_language,
             )
             pairs.append(WordPair(source=source, target=target))
+        if limit is None and not random:
+            total_count = await self._backend.count_user_words(
+                self._user_id,
+                self._source_language.value,
+                word_type=word_type.value if word_type else None,
+            )
+            if total_count > len(pairs):
+                excluded_count = total_count - len(pairs)
+                _logger.warning(
+                    "%d of %d words excluded from get_words() due to missing translations",
+                    excluded_count,
+                    total_count,
+                )
         return pairs
     @classmethod

nl_processing-0.4.0/nl_processing/database_cache/_local_store_queries.py ADDED Viewed

@@ -0,0 +1,58 @@
+"""DDL and query constants for the local SQLite cache store."""
+DDL_CACHED_WORD_PAIRS = """
+CREATE TABLE IF NOT EXISTS cached_word_pairs (
+    source_word_id INTEGER PRIMARY KEY,
+    source_normalized_form TEXT NOT NULL,
+    source_word_type TEXT NOT NULL,
+    target_word_id INTEGER NOT NULL,
+    target_normalized_form TEXT NOT NULL,
+    target_word_type TEXT NOT NULL
+)"""
+DDL_CACHED_SCORES = """
+CREATE TABLE IF NOT EXISTS cached_scores (
+    source_word_id INTEGER NOT NULL,
+    exercise_type TEXT NOT NULL,
+    score INTEGER NOT NULL DEFAULT 0,
+    updated_at TEXT NOT NULL,
+    PRIMARY KEY (source_word_id, exercise_type)
+)"""
+DDL_PENDING_SCORE_EVENTS = """
+CREATE TABLE IF NOT EXISTS pending_score_events (
+    event_id TEXT PRIMARY KEY,
+    source_word_id INTEGER NOT NULL,
+    exercise_type TEXT NOT NULL,
+    delta INTEGER NOT NULL,
+    created_at TEXT NOT NULL,
+    flushed_at TEXT,
+    last_error TEXT
+)"""
+DDL_CACHE_METADATA = """
+CREATE TABLE IF NOT EXISTS cache_metadata (
+    id INTEGER PRIMARY KEY DEFAULT 1,
+    exercise_types TEXT NOT NULL,
+    schema_version INTEGER NOT NULL DEFAULT 1,
+    last_refresh_started_at TEXT,
+    last_refresh_completed_at TEXT,
+    last_flush_completed_at TEXT,
+    last_error TEXT
+)"""
+ALL_DDL = [DDL_CACHED_WORD_PAIRS, DDL_CACHED_SCORES, DDL_PENDING_SCORE_EVENTS, DDL_CACHE_METADATA]
+UPSERT_SCORE = (
+    "INSERT INTO cached_scores (source_word_id, exercise_type, score, updated_at) VALUES (?, ?, ?, ?)"
+    " ON CONFLICT(source_word_id, exercise_type) DO UPDATE SET score = score + ?, updated_at = ?"
+)
+INSERT_PENDING_EVENT = (
+    "INSERT INTO pending_score_events (event_id, source_word_id, exercise_type, delta, created_at)"
+    " VALUES (?, ?, ?, ?, ?)"
+)
+INSERT_WORD_PAIR = "INSERT INTO cached_word_pairs VALUES (?, ?, ?, ?, ?, ?)"
+INSERT_SCORE = "INSERT INTO cached_scores (source_word_id, exercise_type, score, updated_at) VALUES (?, ?, ?, ?)"

nl_processing-0.4.0/nl_processing/database_cache/exceptions.py ADDED Viewed

@@ -0,0 +1,10 @@
+class CacheNotReadyError(Exception):
+    """Raised when cached data is requested before the first usable snapshot exists."""
+class CacheStorageError(Exception):
+    """Raised when the local SQLite cache file cannot be opened, read, or updated."""
+class CacheSyncError(Exception):
+    """Raised when an explicit refresh or flush operation fails synchronously."""

nl_processing-0.4.0/nl_processing/database_cache/local_store.py ADDED Viewed

@@ -0,0 +1,200 @@
+"""SQLite data-access layer for the local word-pair / score cache."""
+from datetime import UTC, datetime
+import json
+import sqlite3
+import aiosqlite
+from nl_processing.database_cache._local_store_queries import (
+    ALL_DDL,
+    INSERT_PENDING_EVENT,
+    INSERT_SCORE,
+    INSERT_WORD_PAIR,
+    UPSERT_SCORE,
+)
+from nl_processing.database_cache.exceptions import CacheStorageError
+def _now() -> str:
+    return datetime.now(tz=UTC).isoformat()
+class LocalStore:
+    """Async SQLite store for cached word pairs, scores, and pending events."""
+    def __init__(self, db_path: str) -> None:
+        self._db_path = db_path
+        self._db: aiosqlite.Connection | None = None
+    @property
+    def _conn(self) -> aiosqlite.Connection:
+        if self._db is None:
+            raise CacheStorageError("LocalStore is not open")
+        return self._db
+    async def open(self) -> None:
+        """Open the SQLite connection and create tables."""
+        try:
+            self._db = await aiosqlite.connect(self._db_path)
+            self._db.row_factory = aiosqlite.Row
+            await self._db.execute("PRAGMA journal_mode=WAL")
+            for ddl in ALL_DDL:
+                await self._db.execute(ddl)
+            await self._db.commit()
+        except sqlite3.Error as exc:
+            raise CacheStorageError(str(exc)) from exc
+    async def close(self) -> None:
+        """Close the SQLite connection."""
+        if self._db:
+            await self._db.close()
+            self._db = None
+    async def get_cached_word_pairs(
+        self,
+        word_type: str | None = None,
+        limit: int | None = None,
+        *,
+        random: bool = False,
+    ) -> list[dict[str, str | int]]:
+        """Query cached word pairs with optional filter, limit, and random ordering."""
+        sql = "SELECT * FROM cached_word_pairs"
+        params: list[str | int] = []
+        if word_type is not None:
+            sql += " WHERE source_word_type = ?"
+            params.append(word_type)
+        if random:
+            sql += " ORDER BY RANDOM()"
+        if limit is not None:
+            sql += " LIMIT ?"
+            params.append(limit)
+        return await self._fetch_all(sql, params)
+    async def get_cached_word_pairs_with_scores(self, exercise_types: list[str]) -> list[dict[str, str | int]]:
+        """Query word pairs and attach scores per exercise type (missing = 0)."""
+        try:
+            rows = await self._fetch_all("SELECT * FROM cached_word_pairs")
+            for row in rows:
+                for et in exercise_types:
+                    sc = await self._conn.execute(
+                        "SELECT score FROM cached_scores WHERE source_word_id=? AND exercise_type=?",
+                        (row["source_word_id"], et),
+                    )
+                    score_row = await sc.fetchone()
+                    row[f"score_{et}"] = int(score_row["score"]) if score_row else 0
+            return rows
+        except sqlite3.Error as exc:
+            raise CacheStorageError(str(exc)) from exc
+    async def get_pending_events(self) -> list[dict[str, str | int]]:
+        return await self._fetch_all("SELECT * FROM pending_score_events WHERE flushed_at IS NULL ORDER BY created_at")
+    async def get_pending_event_count(self) -> int:
+        try:
+            cur = await self._conn.execute("SELECT COUNT(*) FROM pending_score_events WHERE flushed_at IS NULL")
+            row = await cur.fetchone()
+            return int(row[0]) if row else 0
+        except sqlite3.Error as exc:
+            raise CacheStorageError(str(exc)) from exc
+    async def get_metadata(self) -> dict[str, str | int] | None:
+        try:
+            cur = await self._conn.execute("SELECT * FROM cache_metadata WHERE id = 1")
+            row = await cur.fetchone()
+            return dict(row) if row else None
+        except sqlite3.Error as exc:
+            raise CacheStorageError(str(exc)) from exc
+    async def has_snapshot(self) -> bool:
+        try:
+            cur = await self._conn.execute("SELECT 1 FROM cached_word_pairs LIMIT 1")
+            return (await cur.fetchone()) is not None
+        except sqlite3.Error as exc:
+            raise CacheStorageError(str(exc)) from exc
+    async def record_score_and_event(
+        self,
+        source_word_id: int,
+        exercise_type: str,
+        delta: int,
+        event_id: str,
+    ) -> None:
+        """Atomically upsert a cached score and insert a pending event."""
+        now = _now()
+        try:
+            await self._conn.execute(UPSERT_SCORE, (source_word_id, exercise_type, delta, now, delta, now))
+            await self._conn.execute(INSERT_PENDING_EVENT, (event_id, source_word_id, exercise_type, delta, now))
+            await self._conn.commit()
+        except sqlite3.Error as exc:
+            raise CacheStorageError(str(exc)) from exc
+    async def rebuild_snapshot(
+        self,
+        word_pairs: list[tuple[int, str, str, int, str, str]],
+        scores: dict[tuple[int, str], int],
+    ) -> None:
+        """Atomically replace cached word pairs and scores, then reapply pending events."""
+        now = _now()
+        try:
+            await self._conn.execute("DELETE FROM cached_word_pairs")
+            await self._conn.execute("DELETE FROM cached_scores")
+            for wp in word_pairs:
+                await self._conn.execute(INSERT_WORD_PAIR, wp)
+            for (wid, et), score in scores.items():
+                await self._conn.execute(INSERT_SCORE, (wid, et, score, now))
+            for evt in await self.get_pending_events():
+                await self._conn.execute(
+                    UPSERT_SCORE,
+                    (evt["source_word_id"], evt["exercise_type"], evt["delta"], now, evt["delta"], now),
+                )
+            await self._conn.commit()
+        except sqlite3.Error as exc:
+            raise CacheStorageError(str(exc)) from exc
+    async def mark_event_flushed(self, event_id: str) -> None:
+        await self._exec_commit("UPDATE pending_score_events SET flushed_at=? WHERE event_id=?", (_now(), event_id))
+    async def mark_event_failed(self, event_id: str, error: str) -> None:
+        await self._exec_commit("UPDATE pending_score_events SET last_error=? WHERE event_id=?", (error, event_id))
+    async def update_metadata(self, **fields: str | int | None) -> None:
+        if not fields:
+            return
+        set_clause = ", ".join(f"{k} = ?" for k in fields)
+        await self._exec_commit(
+            f"UPDATE cache_metadata SET {set_clause} WHERE id = 1",  # noqa: S608
+            tuple(fields.values()),
+        )
+    async def ensure_metadata(self, exercise_types: list[str]) -> None:
+        await self._exec_commit(
+            "INSERT OR REPLACE INTO cache_metadata (id, exercise_types, schema_version) VALUES (1, ?, 1)",
+            (json.dumps(exercise_types),),
+        )
+    async def get_source_word_id(self, normalized_form: str, word_type: str) -> int | None:
+        """Look up a source_word_id from cached_word_pairs."""
+        try:
+            cur = await self._conn.execute(
+                "SELECT source_word_id FROM cached_word_pairs WHERE source_normalized_form=? AND source_word_type=?",
+                (normalized_form, word_type),
+            )
+            row = await cur.fetchone()
+            return int(row["source_word_id"]) if row else None
+        except sqlite3.Error as exc:
+            raise CacheStorageError(str(exc)) from exc
+    async def _fetch_all(self, sql: str, params: list[str | int] | None = None) -> list[dict[str, str | int]]:
+        try:
+            cur = await self._conn.execute(sql, params or [])
+            return [dict(row) for row in await cur.fetchall()]
+        except sqlite3.Error as exc:
+            raise CacheStorageError(str(exc)) from exc
+    async def _exec_commit(self, sql: str, params: tuple[str | int | None, ...]) -> None:
+        try:
+            await self._conn.execute(sql, params)
+            await self._conn.commit()
+        except sqlite3.Error as exc:
+            raise CacheStorageError(str(exc)) from exc

nl_processing-0.4.0/nl_processing/database_cache/logging.py ADDED Viewed

@@ -0,0 +1,5 @@
+import logging
+def get_logger(name: str) -> logging.Logger:
+    return logging.getLogger(f"nl_processing.database_cache.{name}")

nl_processing-0.4.0/nl_processing/database_cache/models.py ADDED Viewed

@@ -0,0 +1,12 @@
+from datetime import datetime
+from pydantic import BaseModel
+class CacheStatus(BaseModel):
+    is_ready: bool
+    is_stale: bool
+    has_snapshot: bool
+    pending_events: int
+    last_refresh_completed_at: datetime | None
+    last_flush_completed_at: datetime | None

nl_processing-0.4.0/nl_processing/database_cache/service.py ADDED Viewed

@@ -0,0 +1,185 @@
+"""DatabaseCacheService — public API for the local SQLite cache layer."""
+import asyncio
+from datetime import UTC, datetime, timedelta
+import json
+import tempfile
+from uuid import uuid4
+from nl_processing.core.models import Language, PartOfSpeech, Word
+from nl_processing.database.exercise_progress import ExerciseProgressStore
+from nl_processing.database.models import ScoredWordPair, WordPair
+from nl_processing.database_cache.exceptions import CacheNotReadyError
+from nl_processing.database_cache.local_store import LocalStore
+from nl_processing.database_cache.logging import get_logger
+from nl_processing.database_cache.models import CacheStatus
+from nl_processing.database_cache.sync import CacheSyncer
+_log = get_logger("service")
+class DatabaseCacheService:
+    """Offline-first cache backed by a local SQLite database."""
+    def __init__(
+        self,
+        *,
+        user_id: str,
+        source_language: Language,
+        target_language: Language,
+        exercise_types: list[str],
+        cache_ttl: timedelta,
+        cache_dir: str | None = None,
+    ) -> None:
+        if not exercise_types:
+            msg = "exercise_types must be a non-empty list"
+            raise ValueError(msg)
+        self._user_id = user_id
+        self._source_language = source_language
+        self._target_language = target_language
+        self._exercise_types = list(exercise_types)
+        self._cache_ttl = cache_ttl
+        base = cache_dir or tempfile.gettempdir()
+        self._db_path = f"{base}/{user_id}_{source_language.value}_{target_language.value}.db"
+        self._initialized = False
+        self._local: LocalStore | None = None
+        self._syncer: CacheSyncer | None = None
+    async def init(self) -> CacheStatus:
+        """Open local store, bootstrap or refresh as needed, return status."""
+        progress_store = ExerciseProgressStore(
+            user_id=self._user_id,
+            source_language=self._source_language,
+            target_language=self._target_language,
+            exercise_types=self._exercise_types,
+        )
+        self._local = LocalStore(self._db_path)
+        await self._local.open()
+        self._syncer = CacheSyncer(self._local, progress_store)
+        await self._local.ensure_metadata(self._exercise_types)
+        meta = await self._local.get_metadata()
+        if meta and json.loads(str(meta["exercise_types"])) != self._exercise_types:
+            await self._local.ensure_metadata(self._exercise_types)
+            await self._syncer.refresh()
+        elif not await self._local.has_snapshot():
+            await self._syncer.refresh()
+        elif self._is_stale(meta):
+            asyncio.create_task(self._background_refresh())
+        self._initialized = True
+        return await self.get_status()
+    async def get_words(
+        self,
+        *,
+        word_type: str | None = None,
+        limit: int | None = None,
+        random: bool = False,
+    ) -> list[WordPair]:
+        """Return cached word pairs, optionally filtered."""
+        self._ensure_ready()
+        assert self._local is not None
+        rows = await self._local.get_cached_word_pairs(word_type=word_type, limit=limit, random=random)
+        return [self._row_to_word_pair(r) for r in rows]
+    async def get_word_pairs_with_scores(self) -> list[ScoredWordPair]:
+        """Return cached word pairs with exercise scores."""
+        self._ensure_ready()
+        assert self._local is not None
+        rows = await self._local.get_cached_word_pairs_with_scores(self._exercise_types)
+        result: list[ScoredWordPair] = []
+        for row in rows:
+            pair = self._row_to_word_pair(row)
+            scores = {et: int(row[f"score_{et}"]) for et in self._exercise_types}
+            result.append(ScoredWordPair(pair=pair, scores=scores, source_word_id=int(row["source_word_id"])))
+        return result
+    async def record_exercise_result(self, *, source_word: Word, exercise_type: str, delta: int) -> None:
+        """Record a score change locally and queue for remote flush."""
+        self._ensure_ready()
+        assert self._local is not None
+        if exercise_type not in self._exercise_types:
+            msg = f"Unknown exercise_type '{exercise_type}'; expected one of {sorted(self._exercise_types)}"
+            raise ValueError(msg)
+        if delta not in (1, -1):
+            msg = f"delta must be +1 or -1, got {delta}"
+            raise ValueError(msg)
+        wid = await self._local.get_source_word_id(source_word.normalized_form, source_word.word_type.value)
+        if wid is None:
+            msg = f"Word '{source_word.normalized_form}' not found in cache"
+            raise ValueError(msg)
+        await self._local.record_score_and_event(wid, exercise_type, delta, str(uuid4()))
+        asyncio.create_task(self._background_flush())
+    async def refresh(self) -> None:
+        """Trigger a full cache refresh from the remote database."""
+        assert self._syncer is not None
+        await self._syncer.refresh()
+    async def flush(self) -> None:
+        """Flush pending score events to the remote database."""
+        assert self._syncer is not None
+        await self._syncer.flush()
+    async def get_status(self) -> CacheStatus:
+        """Build current cache status from metadata and pending events."""
+        assert self._local is not None
+        meta = await self._local.get_metadata()
+        has_snap = await self._local.has_snapshot()
+        pending = await self._local.get_pending_event_count()
+        last_refresh = _parse_dt(meta, "last_refresh_completed_at") if meta else None
+        last_flush = _parse_dt(meta, "last_flush_completed_at") if meta else None
+        return CacheStatus(
+            is_ready=self._initialized and has_snap,
+            is_stale=self._is_stale(meta),
+            has_snapshot=has_snap,
+            pending_events=pending,
+            last_refresh_completed_at=last_refresh,
+            last_flush_completed_at=last_flush,
+        )
+    def _ensure_ready(self) -> None:
+        if not self._initialized or self._local is None:
+            raise CacheNotReadyError("Cache not initialized — call init() first")
+    def _is_stale(self, meta: dict[str, str | int] | None) -> bool:
+        if not meta:
+            return True
+        last_refresh = _parse_dt(meta, "last_refresh_completed_at")
+        if last_refresh is None:
+            return True
+        return datetime.now(tz=UTC) - last_refresh > self._cache_ttl
+    def _row_to_word_pair(self, row: dict[str, str | int]) -> WordPair:
+        return WordPair(
+            source=Word(
+                normalized_form=str(row["source_normalized_form"]),
+                word_type=PartOfSpeech(row["source_word_type"]),
+                language=self._source_language,
+            ),
+            target=Word(
+                normalized_form=str(row["target_normalized_form"]),
+                word_type=PartOfSpeech(row["target_word_type"]),
+                language=self._target_language,
+            ),
+        )
+    async def _background_refresh(self) -> None:
+        try:
+            assert self._syncer is not None
+            await self._syncer.refresh()
+        except Exception:
+            _log.exception("background refresh failed")
+    async def _background_flush(self) -> None:
+        try:
+            assert self._syncer is not None
+            await self._syncer.flush(skip_if_running=True)
+        except Exception:
+            _log.exception("background flush failed")
+def _parse_dt(meta: dict[str, str | int], key: str) -> datetime | None:
+    val = meta[key] if key in meta else None
+    if val is None:
+        return None
+    return datetime.fromisoformat(str(val))

nl_processing-0.4.0/nl_processing/database_cache/sync.py ADDED Viewed

@@ -0,0 +1,82 @@
+"""Refresh / flush orchestration for the local cache."""
+import asyncio
+from datetime import UTC, datetime
+from nl_processing.database.exercise_progress import ExerciseProgressStore
+from nl_processing.database_cache.exceptions import CacheSyncError
+from nl_processing.database_cache.local_store import LocalStore
+from nl_processing.database_cache.logging import get_logger
+_log = get_logger("sync")
+class CacheSyncer:
+    """Coordinates full refresh from remote and flush of pending events back to remote."""
+    def __init__(self, local_store: LocalStore, progress_store: ExerciseProgressStore) -> None:
+        self._local = local_store
+        self._remote = progress_store
+        self._refresh_lock = asyncio.Lock()
+        self._flush_lock = asyncio.Lock()
+    async def refresh(self) -> None:
+        """Pull a full snapshot from the remote database and rebuild the local cache."""
+        if self._refresh_lock.locked():
+            return
+        async with self._refresh_lock:
+            now = datetime.now(tz=UTC).isoformat()
+            try:
+                await self._local.update_metadata(last_refresh_started_at=now)
+                scored_pairs = await self._remote.export_remote_snapshot()
+                word_pairs: list[tuple[int, str, str, int, str, str]] = [
+                    (
+                        sp.source_word_id,
+                        sp.pair.source.normalized_form,
+                        sp.pair.source.word_type.value,
+                        0,
+                        sp.pair.target.normalized_form,
+                        sp.pair.target.word_type.value,
+                    )
+                    for sp in scored_pairs
+                ]
+                scores: dict[tuple[int, str], int] = {}
+                for sp in scored_pairs:
+                    for exercise_type, score in sp.scores.items():
+                        scores[(sp.source_word_id, exercise_type)] = score
+                await self._local.rebuild_snapshot(word_pairs, scores)
+                await self._local.update_metadata(
+                    last_refresh_completed_at=datetime.now(tz=UTC).isoformat(),
+                )
+            except CacheSyncError:
+                raise
+            except Exception as exc:
+                _log.exception("refresh failed")
+                await self._local.update_metadata(last_error=str(exc))
+                raise CacheSyncError(str(exc)) from exc
+    async def flush(self, *, skip_if_running: bool = False) -> None:
+        """Push pending local score events to the remote database.
+        Args:
+            skip_if_running: If True, return immediately if another flush is already running.
+                           If False (default), wait for any running flush to complete.
+        """
+        if skip_if_running and self._flush_lock.locked():
+            return
+        async with self._flush_lock:
+            events = await self._local.get_pending_events()
+            for evt in events:
+                eid = str(evt["event_id"])
+                try:
+                    await self._remote.apply_score_delta(
+                        event_id=eid,
+                        source_word_id=int(evt["source_word_id"]),
+                        exercise_type=str(evt["exercise_type"]),
+                        delta=int(evt["delta"]),
+                    )
+                    await self._local.mark_event_flushed(eid)
+                except Exception as exc:
+                    _log.warning("flush failed for event %s: %s", eid, exc)
+                    await self._local.mark_event_failed(eid, str(exc))
+            await self._local.update_metadata(last_flush_completed_at=datetime.now(tz=UTC).isoformat())

{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/prompts/generate_nl_prompt.py RENAMED Viewed

@@ -6,7 +6,7 @@ Usage:
 This script:
 1. Generates synthetic test images and encodes real photos
 2. Encodes them to base64
-3. Builds a ChatPromptTemplate with 5 few-shot examples (HumanMessage + AIMessage + ToolMessage triplets)
+3. Builds a ChatPromptTemplate with 7 few-shot examples (HumanMessage + AIMessage + ToolMessage triplets)
 4. Serializes with dumpd() and saves to nl.json
 The script is the source of truth — nl.json is the generated artifact.
@@ -92,6 +92,9 @@ EXAMPLE_5_EXPECTED = ""
 EXAMPLE_6_TEXT = "Please take your shoes off before entering the house"
 EXAMPLE_6_EXPECTED = ""
+EXAMPLE_7_TEXT = "Remember to bring your umbrella tomorrow"
+EXAMPLE_7_EXPECTED = ""
 OUTPUT_PATH = Path(__file__).parent / "nl.json"
@@ -128,13 +131,14 @@ def _make_example_ai(expected_text: str, call_id: str) -> AIMessage:
 def build_prompt() -> ChatPromptTemplate:
-    """Build the Dutch extraction prompt with 6 few-shot examples."""
+    """Build the Dutch extraction prompt with 7 few-shot examples."""
     img1 = _generate_image_b64(EXAMPLE_1_TEXT)
     img2 = _generate_image_b64(EXAMPLE_2_TEXT)
     img3 = _encode_existing_image_b64(EXAMPLE_3_IMAGE)
     img4 = _encode_existing_image_b64(EXAMPLE_4_IMAGE)
     img5 = _generate_image_b64(EXAMPLE_5_TEXT)
     img6 = _generate_image_b64(EXAMPLE_6_TEXT)
+    img7 = _generate_image_b64(EXAMPLE_7_TEXT)
     return ChatPromptTemplate.from_messages([
         SystemMessage(content=SYSTEM_INSTRUCTION),
@@ -156,6 +160,9 @@ def build_prompt() -> ChatPromptTemplate:
         _make_example_human(img6),
         _make_example_ai(EXAMPLE_6_EXPECTED, "call_example_6"),
         ToolMessage(content=EXAMPLE_6_EXPECTED, tool_call_id="call_example_6"),
+        _make_example_human(img7),
+        _make_example_ai(EXAMPLE_7_EXPECTED, "call_example_7"),
+        ToolMessage(content=EXAMPLE_7_EXPECTED, tool_call_id="call_example_7"),
         MessagesPlaceholder(variable_name="images"),
     ])

{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing/extract_text_from_image/prompts/nl.json RENAMED Viewed

@@ -398,6 +398,68 @@
           "status": "success"
         }
       },
+      {
+        "lc": 1,
+        "type": "constructor",
+        "id": [
+          "langchain",
+          "schema",
+          "messages",
+          "HumanMessage"
+        ],
+        "kwargs": {
+          "content": [
+            {
+              "type": "image_url",
+              "image_url": {
+                "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAyAAAADICAIAAACf7RJNAAAXuklEQVR4Ae3BAYor14IFwcz9LzoHLggk1KXu51/tsfGJsGJmZmZm7mPFzMzMzNzHipmZmZm5jxUzMzMzcx8rZmZmZuY+VszMzMzMfayYmZmZmftYMTMzMzP3sWJmZmZm7mPFzMzMzNzHipmZmZm5jxUzMzMzcx8rZmZmZuY+VszMf4nKUTEzM7/Aipuo/EDF/D9RK/5L1Ip/NrXib6HypGLm30Ot+FdRKy6oFf9IasX8JVbcQeUPVczfSOWo+G9QOSr+qVSOit+n8qRi5l9C5aj4l1A5Kt6oHBX/MCpHxfw5K+6g8ucq5u+iclT8N6gcFf9UKkfF71N5UjHzL6FyVPxLqBwVb1SOin8YlaNi/pwVd1A5Kj5SeaiYv4vKUfHfoHJU/FOpHBV/C5WjYubfQ+Wo+JdQOSreqBwV/zAqR8X8OSvuoHJUfEfloWL+FipHxX+DylHxT6VyVMzMNZWj4l9C5ah4o3JU/MOoHBXz56y4g8pR8QMqR8X8LVSOiv8GlaPin0rlqJiZaypHxb+EylHxRuWo+IdROSrmz1lxB5Wj4mdUjor5fSpHxX+DylHxT6VyVMzMNZWj4l9C5ah4o3JU/MOoHBXz56y4g8pR8TMqR8X8PpWj4r9B5aj4p1I5KmbmmspR8S+hclS8UTkq/mFUjor5c1bcQeWo+BmVo+KayquK76gVDyqvKt6ovKr4EypPKr6jVjxReVLxFZUnFR+pHBWHypOKP6TyquI7asWDypOK/43KhYrvqLyq+DUqRwWovKr4AbXiQeVJxRO14oJa8aDyquJPqDypeFArbqJW/IxacU3lVcUPqBXfUSu+olY8UXlScQe14jtqxQW14onKk4qvqDyp+EjlqACVVxU/oFY8qDypuKbypOIjlaPijcpR8R2VJxWHWnEflQsV31F5UvEDasUTlScVX1F5VfEnVJ5U/IBa8UTliRV3UDkqfkDlqLigcq3igspRqVyrOFSuVXxH5ULFBZWjAlQuVDyoXKi4oHJUKhcqfkDlWsUFlaNS+UrFX6XyUcUFlWsVv0DlByquqRyVylcqDpWj4o3KUalcq/iOyoVK5aj4n6kcFd9ROSreqFyruKZyVFxTOSpeqRwVoPKViv+NylFxTeWoeKNyVIDKhYoHlQsVF1R+oOKaylGpfKXijcqFigsqR8UblaPimsp3Ku6g8lHFBZULFddUjgpQuVDxoHKt4jsqFyquqRwVoPLGijuoHBXfUXmo+IrKdyq+ovJjlcp3Kq6pfFTxFZWjUvmoAlQ+qviKys9UfKTynYqvqHxU8b9RuVBxQeU7FXdT+ZmKCyofVTyoHBVvVH6s4prKz1T8z1QeKq6pPFS8UvlOxQWVo+KaylHxSuWoVL5S8T9TOSquqRwVb1SOSuWjClD5qOIrKj9TcUHlo4o3Kh9VfEXlqHijclRcUPmBipuoXKi4oPJRxQWVo1L5qAJUvlNxTeWjigsqR6XyFSvuoHJUXFN5UvEVlScVDypPKt6ovKl4ULlQ8aDyUHFB5aHiicpDxRuVNxUPKg+VykPFE5WHijcqryoeVJ5UXFB5UvGg8qTijcqbil+gclR8pPKk4kHlScWtVF5VPKg8qfiKypuKr6gcFW9U3lQ8qDxUXFB5qHii8qriDipHxTWVo+KVypOKB5UnFV9ROSquqRwVr1TeVNxN5ai4pnJUvFF5U/Gg8lCpPFQ8qDxUfEXlVcWDypOKr6i8qbim8lDxoPKk4o3KUfFG5aj4ispDxYPKq4pbqRwV31F5qHii8lDxFZU3FU9ULlQ8qDxUXFB5qHhQeVLxFZU3FU+suIPKH6r4ispDxVdUjoo3Kk8q3qi8qnijclR8ReWh4isqR8UrlVcVb1ReVbxROSreqDypeKPyUPFG5aHiKypHxRuVJxW/RuWouKbyUPEVlYeK+6g8VHxF5aHijcqTimsqR8UblScVb1SOiq+oPFS8UXlScQeVo+KCykPFE5WHiq+oPFS8UTkqrqkcFa9UXlX8ApWj4prKUfFG5UnFG5U3FW9Ujoo3Kg8VX1F5qHij8qTimsqTijcqDxWvVI6KNypHxSuVJxVvVB4qbqVyVHyk8lDxRuVJxSuVVxVvVF5VvFE5Kt6oPKl4o/Kk4pXKq4pXVtxB5Q9VfEXloeKCylHxSuWh4isqTyouqBwVr1QeKi6oHBWvVJ5UfEXlScVXVI6KNyoPFRdUHipeqTxUXFA5Kl6pPFT8JpWj4prKUXFB5aHiPipHxQWVh4o3Kg8VH6kcFW9UHiouqBwVr1QeKi6oPFTcROWo+IrKUfFK5ai4oPJQ8UblqLimclS8UnlS8TtUjoprKkfFG5UnFV9ReVLxFZWj4o3KQ8UFlaPijcqTimsqDxUXVI6KVypHxRuVo+KVykPFBZWj4lYqR8U1lYeKCyoPFa9UnlR8ReVJxVdUHipeqTxUXFB5qHil8qTijRV3UPmBio9UHiquqRwVr1SOimsqR8U1laPilcpR8ZHKUfFE5aHigspDxQWVh4pXKkfFRypHxROVh4prKkfFK5WHit+kclRcUHmouKbyUHETlaPimspDxSuVh4qPVI6KNypHxTWVo+KVylHxkcpRcROVo+IrKkfFE5WHimsqDxWvVI6KaypHxSuVh4pfo3JUXFM5Kt6oPFRcUHmouKByVLxROSquqTxUvFJ5qPhI5aj4SOWoeKJyVLxROSpeqRwV11SOilupHBXXVI6Kj1SOilcqDxUXVB4qrqkcFa9UjoqPVI6KVyoPFV+x4g4qR8UblaPiI5U/VPFK5ai4pnJUXFM5Kl6p/KGKJypHxTWVo+IjlaPilcpR8ZHKUfFE5Q9VvFI5Kn6ZylFxQeWo+I7KUXETlaPiI5Wj4pXKUfEdlaPijcpRcU3lqHilclR8pHJU3ETloeKVylHxSuWo+I7KUfFK5ai4pnJUvFJ5qPg1KkfFNZWj4o3KUXFN5ai4pnJUvFE5Kj5SOSpeqRwVH6n8uYonKkfFG5Wj4onKQ8U1laPiVipHxTWVo+IjlYeKJypHxUcqR8U1laPiicpDxUcqDxVPVB4qvmLFHVSOijcqDxXXVP5QxSuVo+KaylFxTeWoeKLy5yqeqBwV11SOio9UjopXKkfFRyoPFQ8qf6jilcpR8ctUjooLKkfFd1SOipuoHBUfqRwVr1SOiu+oHBVvVI6KaypHxROVh4qPVI6K+6gcFa9UjopXKkfFd1SOilcqR8U1laPilcpDxa9ROSquqRwVb1SOimsqR8U1laPijcpR8ZHKUfFK5aj4SOXPVTxROSreqBwVT1SOio9UjopbqRwVF1QeKr6jclQ8UTkqPlI5Kq6pHBVPVI6KH1A5Kp6oHBUXrLiDylHxFZWj4prKn6h4o3JUXFM5Kq6pHBVPVP5QxSuVo+KaylHxkcpR8UrlqPiOylHxoPInKt6oHBW/TOWouKByVHxH5ai4icpR8ZHKUfFK5aj4jspR8UblqLimclQ8UTkqvqNyVNxH5aHiQeWh4pXKUfEdlaPilcpRcU3lqHilclT8JpWj4prKUfFG5ai4pnJUXFM5Kt6oHBUfqRwVr1SOio9U/lDFK5Wj4o3KUfFE5aj4SOWouJXKUXFB5aj4AZWj4onKUfGRylFxTeWoeKJyVPyAylHxROWouGDFHVSOiq+oPFRcUDkq/iqVo+KaylFxTeWoeKJyVPxVKkfFNZWj4iOVo+KVylHxHZWj4kHlqPirVI6KX6ZyVFxQOSq+o3JU3ETlqPhI5ah4pXJUfEflqHijclRcUzkqnqgcFd9ROSpupXJUPKgcFW9UjorvqBwVr1SOimsqR8UrlaPiN6kcFddUjoo3KkfFNZWj4prKUfFG5aj4SOWoeKVyVHykclT8VSpHxRuVo+KJylHxkcpRcSuVo+KCylHxAypHxROVo+IjlaPimspR8UTlqPgBlaPiicpRccGKO6gcFRdUHiq+onJU/FUqR8U1laPimspR8UTloeIvUTkqrqkcFR+pHBWvVI6Kj1QeKh5Ujoq/SuWo+GUqR8UFlaPiOypHxU1UjoqPVI6KVypHxXdUjoo3KkfFNZWj4onKQ8VHKkfFrVSOikPloeKNylHxHZWj4pXKUXFN5ah4pXJU/CaVo+KaylHxRuWouKZyVFxTOSreqBwVH6kcFa9UjoqPVI6Kv0rlqHijclQ8UTkqPlI5Km6lclRcUHmo+I7KUfFE5aj4SOWouKZyVDxROSp+QOWoeKJyVFyw4g4qR8U1laPiKypHxV+lclRcUzkqrqkcFa9Ujoq/ROWouKZyVHykclS8UjkqPlI5Kp6oHBV/lcpR8ctUjooLKkfFd1SOipuoHBUfqRwVr1SOiu+oHBVvVI6KaypHxSuVo+IjlaPibipHBagcFV9ROSq+o3JUvFI5Kq6pHBWvVI6K36RyVFxTOSreqBwV11SOimsqR8UblaPiI5Wj4pXKUfEdlaPiL1E5Kt6oHBVPVB4qrqkcFbdSOSquqRwVH6k8VDxROSo+UjkqrqkcFa9UjoqPVI6KVypHxQUr7qByVFxTeah4o/JQ8ZFa8UblqLimclRcUzkqXqkcFd9RK16pHBXXVI6Kj1SOilcqR8VHKkfFE5WHio/UijcqR8UvUzkqLqg8VFxTeai4icpRcU3loeKVylHxHZWj4o3KUXFN5ah4pXJUfKRyVNxN5agAlaPiKyoPFddUHipeqRwVF1QeKl6pHBW/SeWouKZyVLxROSquqRwV11SOijcqR8U1lYeKVypHxXdUjorvqBWvVI6KNypHxSuVo+KaylFxK5Wj4prKUfGRylHxSuWo+EjlqLimclS8UjkqPlI5Kl6pHBUXrLiDylHxkcpDxRuVo+KaylHxSuWouKZyVFxTOSpeqTxUXFM5Kp6oHBXXVI6Kj1SOilcqR8U1lYeKVypHxTWVo+KVylHxy1SOimsqR8U1laPiPipHxTWVo+KNylHxHZWj4o3KUXFN5ah4pfJQcU3lqLibypuKaypHxTWVo+KNylHxFZUnFa9UjorfpPJQ8RWVh4o3KkfFNZWj4prKUfFG5ai4pnJUvFE5Kr6jclR8pHJUPFE5Kt6oHBWvVI6KaypHxa1UHiouqDxUXFM5Kl6pHBUfqRwV11SOilcqR8VHKkfFK5Wj4oIVd1A5Kj5Seah4o/JQ8RWVh4pXKkfFNZWj4prKUfFG5aHiKyoPFU9UjoprKkfFRypHxSuVh4qvqDxUvFF5qPiKykPFK5Wj4pepHBXXVB4qvqLyUHEflYeKr6g8VLxROSq+o3JUvFE5Kq6pHBVvVB4qvqLyUPELVF5VXFN5qPiKykPFG5Wj4o3Kq4pXKkfFL1M5Kt6oPKl4o3JUXFM5Kq6pHBVvVB4qvqLyUPFG5aj4jspDxQWVo+KVylHxRuWoeKXyUPEVlYeKu6kcFddUHiq+ovJQ8UrlqPhI5ai4pnJUvFJ5qLig8lDxSuWouGDFHVSOiu+oPFS8UXlS8UTloeKNylFxTeWouKZyVHxF5aHilcpDxSuVo+KaylHxkcpR8UrlVcUTlYeKCypPKp6oPFS8UTkqfpnKQ8U1lScVT1QeKm6l8qriicpDxVdUjorvqBwVb1SOimsqR8VXVB4qXqk8qfgFKq8qPlJ5UvFE5aHiKyoPFU9U3lS8UjkqfpnKQ8UTlVcVb1SOimsqR8U1laPijcqriicqDxVfUTkqfkDlScUTlScVr1SOijcqR8UblScVT1SeVNxN5aHimspDxSuVh4o3KkfFRypHxTWVo+KNypOKJypPKt6oHBUXrLiDylHxAypHxVdUvlPxFZWj4prKUXFN5aj4isoPVLxROSquqRwVH6kcFa9Ufqzimsp3Kr6iclT8PpU3FW9UvlNxN5WfqbigclR8R+WoeKNyVFxTOSq+ovJjFb9D5aHiB1S+U3FN5ccqXqkcFb9M5ccq3qgcFddUjoprKkfFG5WfqbigclT8jMoPVLxROSreqBwVX1H5mYq7qbypeKPyAxVfUTkqPlI5Kq6pHBVfUfmBiq+oHBUXrLiDylHxAyoPFV9R+ajiKypHxTWVo+KaylFxQeWjiq+oHBXXVI6Kj1SOilcqP1PxHZWPKr6iclT8PpWvVLxS+ajiF6j8QMU1laPiOypHxRuVo+KaylFxQeVnKn6HykPFD6h8VPGRykeVylHxSuWo+H0qP1PxRuWouKZyVFxTOSreqPxMxQWVo+LHVD6q+IrKUfFG5ai4oPIDFb9A5U3FG5WPKi6oHBUfqRwV11SOigsqH1VcUDkqLlhxE7Xix1Sg4iOVVxXfUSu+o1Z8R634AZUnFd9RK76jVvyAWvEVteJQeVXxh1ReVXxHrfgbqTypuKbyquI3qRWHyquKH1ArfkatuKBWfEet+AGVJxWHylHxO1SOij+k8qrix1ReVTxRK76iVvyNVF5VPKgVF9SK76gV31ErLqgVh8qrih9QK/6cyquK76gVF9SK76i8qjjUit+k8lDxkcqTih9QK35ArfiOWvEdlVcVP6BWXLNiZub/g8pR8QtUjoqZmb+RFTMz/x9UjopfoHJUzMz8jayYmfnbqRwVv0DlqJiZ+XtZMTPzt1M5Ku6m8lAxM/P3smJm5neoFW9UHipupfKkYmbm72XFzMwvUPlOxU1U3lTMzPztrJiZ+R0q1ypuovKmYmbm/4MVMzO/RuVNxd1UHipmZv7/WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPlbMzMzMzH2smJmZmZn7WDEzMzMz97FiZmZmZu5jxczMzMzcx4qZmZmZuY8VMzMzM3MfK2ZmZmbmPv8HPt/pW2VCUvMAAAAASUVORK5CYII="
+              }
+            }
+          ],
+          "type": "human"
+        }
+      },
+      {
+        "lc": 1,
+        "type": "constructor",
+        "id": [
+          "langchain",
+          "schema",
+          "messages",
+          "AIMessage"
+        ],
+        "kwargs": {
+          "content": "",
+          "type": "ai",
+          "tool_calls": [
+            {
+              "name": "ExtractedText",
+              "args": {
+                "text": ""
+              },
+              "id": "call_example_7",
+              "type": "tool_call"
+            }
+          ],
+          "invalid_tool_calls": []
+        }
+      },
+      {
+        "lc": 1,
+        "type": "constructor",
+        "id": [
+          "langchain",
+          "schema",
+          "messages",
+          "ToolMessage"
+        ],
+        "kwargs": {
+          "content": "",
+          "type": "tool",
+          "tool_call_id": "call_example_7",
+          "status": "success"
+        }
+      },
       {
         "lc": 1,
         "type": "constructor",

nl_processing-0.4.0/nl_processing/translate_word/__init__.py ADDED Viewed

File without changes

{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nl_processing
-Version: 0.3.0
+Version: 0.4.0
 Summary: Natural language processing playground
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown
@@ -9,6 +9,7 @@ Requires-Dist: langchain<1,>=0.3
 Requires-Dist: langchain-openai<1,>=0.3
 Requires-Dist: opencv-python<5,>=4.10
 Requires-Dist: asyncpg<1,>=0.30
+Requires-Dist: aiosqlite<1,>=0.20
 # nl_processing

{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/SOURCES.txt RENAMED Viewed

@@ -12,7 +12,6 @@ nl_processing/core/models.py
 nl_processing/core/prompts.py
 nl_processing/core/scripts/prompt_author.py
 nl_processing/database/__init__.py
-nl_processing/database/cached_service.py
 nl_processing/database/exceptions.py
 nl_processing/database/exercise_progress.py
 nl_processing/database/logging.py
@@ -24,6 +23,14 @@ nl_processing/database/backend/_neon_exercise.py
 nl_processing/database/backend/_queries.py
 nl_processing/database/backend/abstract.py
 nl_processing/database/backend/neon.py
+nl_processing/database_cache/__init__.py
+nl_processing/database_cache/_local_store_queries.py
+nl_processing/database_cache/exceptions.py
+nl_processing/database_cache/local_store.py
+nl_processing/database_cache/logging.py
+nl_processing/database_cache/models.py
+nl_processing/database_cache/service.py
+nl_processing/database_cache/sync.py
 nl_processing/extract_text_from_image/__init__.py
 nl_processing/extract_text_from_image/benchmark.py
 nl_processing/extract_text_from_image/image_encoding.py

{nl_processing-0.3.0 → nl_processing-0.4.0}/nl_processing.egg-info/requires.txt RENAMED Viewed

@@ -3,3 +3,4 @@ langchain<1,>=0.3
 langchain-openai<1,>=0.3
 opencv-python<5,>=4.10
 asyncpg<1,>=0.30
+aiosqlite<1,>=0.20

{nl_processing-0.3.0 → nl_processing-0.4.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "nl_processing"
-version = "0.3.0"
+version = "0.4.0"
 description = "Natural language processing playground"
 readme = "README.md"
 requires-python = ">=3.12"
@@ -14,6 +14,7 @@ dependencies = [
   "langchain-openai>=0.3,<1",
   "opencv-python>=4.10,<5",
   "asyncpg>=0.30,<1",
+  "aiosqlite>=0.20,<1",
 ]
 [tool.setuptools.packages.find]

nl_processing-0.3.0/nl_processing/database/cached_service.py DELETED Viewed

@@ -1,82 +0,0 @@
-"""CachedDatabaseService — wraps DatabaseService with in-memory LRU cache.
-.. deprecated::
-    Legacy prototype helper; superseded by planned database_cache module.
-"""
-from nl_processing.core.models import Language, PartOfSpeech, Word
-from nl_processing.database.models import AddWordsResult, WordPair
-from nl_processing.database.service import DatabaseService
-class CachedDatabaseService:
-    """Wraps DatabaseService with an in-memory LRU cache for get_words.
-    .. deprecated::
-        Legacy prototype helper; superseded by planned database_cache module.
-    """
-    def __init__(
-        self,
-        *,
-        user_id: str,
-        source_language: Language = Language.NL,
-        target_language: Language = Language.RU,
-        cache_max_size: int = 128,
-    ) -> None:
-        self._inner = DatabaseService(
-            user_id=user_id,
-            source_language=source_language,
-            target_language=target_language,
-        )
-        self._cache: dict[tuple[str | None, int | None, bool], list[WordPair]] = {}
-        self._cache_max_size = cache_max_size
-        self._cache_order: list[tuple[str | None, int | None, bool]] = []
-    async def add_words(self, words: list[Word]) -> AddWordsResult:
-        """Delegate to inner service and clear the cache."""
-        result = await self._inner.add_words(words)
-        self._cache.clear()
-        self._cache_order.clear()
-        return result
-    async def get_words(
-        self,
-        *,
-        word_type: PartOfSpeech | None = None,
-        limit: int | None = None,
-        random: bool = False,
-    ) -> list[WordPair]:
-        """Return word pairs, serving from cache when possible.
-        Random queries and zero-size caches bypass the cache entirely.
-        """
-        if random or self._cache_max_size <= 0:
-            return await self._inner.get_words(
-                word_type=word_type,
-                limit=limit,
-                random=random,
-            )
-        key = (word_type.value if word_type else None, limit, False)
-        if key in self._cache:
-            self._cache_order.remove(key)
-            self._cache_order.append(key)
-            return self._cache[key]
-        result = await self._inner.get_words(
-            word_type=word_type,
-            limit=limit,
-            random=random,
-        )
-        self._cache[key] = result
-        self._cache_order.append(key)
-        while len(self._cache_order) > self._cache_max_size:
-            oldest = self._cache_order.pop(0)
-            self._cache.pop(oldest, None)
-        return result
-    @classmethod
-    async def create_tables(cls) -> None:
-        """Delegate to DatabaseService.create_tables."""
-        await DatabaseService.create_tables()