PyPI - codespine - Versions diffs - 1.0.5__tar.gz → 1.0.6__tar.gz - Mend

codespine 1.0.5tar.gz → 1.0.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

{codespine-1.0.5 → codespine-1.0.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: codespine
-Version: 1.0.5
+Version: 1.0.6
 Summary: Local Java code intelligence indexer backed by a graph database
 Author: CodeSpine contributors
 License: MIT License

{codespine-1.0.5 → codespine-1.0.6}/codespine/__init__.py RENAMED Viewed

@@ -1,4 +1,4 @@
 """CodeSpine package."""
 __all__ = ["__version__"]
-__version__ = "1.0.5"
+__version__ = "1.0.6"

{codespine-1.0.5 → codespine-1.0.6}/codespine/cli.py RENAMED Viewed

@@ -192,6 +192,21 @@ def _index_shard_group(
                 with output_lock:
                     _phase(f"{prefix}Tracing calls...", "starting...")
                 return
+            if event == "resolve_calls_heartbeat":
+                # Fires every 2 s from a daemon thread so the spinner stays
+                # alive even when the resolver produces no new edges.
+                scanned = int(payload.get("scanned", 0))
+                edges   = int(payload.get("edges", 0))
+                elapsed_s = float(payload.get("elapsed", 0.0))
+                if not parallel:
+                    click.echo(
+                        f"\r{_spinner_char()} {prefix}Tracing calls...   "
+                        f"{edges:>6} resolved / {scanned} scanned  {elapsed_s:.1f}s  ",
+                        nl=False,
+                    )
+                    call_state["shown"] = True
+                call_state["last_ts"] = now
+                return
             if event == "resolve_calls_progress":
                 call_state["count"] = int(payload.get("calls_resolved", 0))
                 if (now - call_state["last_ts"]) >= 0.25:
@@ -345,6 +360,37 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
     # For single-project analysis this is transparent — shard() always
     # returns a GraphStore pointing to the correct shard path.
     sg = ShardedGraphStore(read_only=False)
+    # ── SIGINT handler: flush partial index on Ctrl+C ────────────────────
+    # The handler captures `sg` by closure.  On interrupt it snapshots all
+    # open shards so `codespine stats` and MCP see the partial result, then
+    # calls os._exit(130) to bypass Python cleanup (safe for CLI process).
+    # A second Ctrl+C hard-exits immediately.
+    _sigint_pressed: list[bool] = [False]
+    _old_sigint_handler = signal.getsignal(signal.SIGINT)
+    def _sigint_flush(signum: int, frame: object) -> None:  # noqa: ARG001
+        if _sigint_pressed[0]:
+            os._exit(130)
+        _sigint_pressed[0] = True
+        # Restore default handler so a second Ctrl+C exits immediately.
+        signal.signal(signal.SIGINT, signal.default_int_handler)
+        click.secho(
+            "\n\n⚠  Interrupted — flushing partial index to read replica…",
+            fg="yellow",
+        )
+        try:
+            sg.snapshot_all(background=False)
+            click.secho(
+                "✓ Partial index saved. Run 'codespine stats' to see what was indexed.",
+                fg="yellow",
+            )
+        except Exception:  # noqa: BLE001
+            pass
+        os._exit(130)
+    signal.signal(signal.SIGINT, _sigint_flush)
     # The indexer is initialised per-module below with the right shard store.
     # We keep a single ShardedGraphStore to fan-out cross-module linking later.
@@ -594,6 +640,9 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
     sg.snapshot_all(background=False)
     _finish_phase(snap_label, "MCP will reload automatically")
+    # Restore original SIGINT handler now that we've finished cleanly.
+    signal.signal(signal.SIGINT, _old_sigint_handler)
 @main.command()
 @click.argument("query")
@@ -741,15 +790,27 @@ def stats(as_json: bool, show_shards: bool) -> None:
         click.secho("No projects indexed yet. Run 'codespine analyse <path>'.", fg="yellow")
         return
+    def _stat_count(store, query: str, params: dict) -> int:
+        """Run a stats count query — returns 0 on any failure."""
+        try:
+            rows = store.query_records(query, params)
+            return int(rows[0]["n"]) if rows else 0
+        except Exception as exc:  # noqa: BLE001
+            click.secho(f"   (stat unavailable: {exc})", fg="yellow")
+            return 0
     rows = []
     for p in all_projects_meta:
         pid = p["id"]
         # Route each query to the project's owning shard.
         ps = _project_store(pid)
-        files = ps.query_records(
-            "MATCH (f:File) WHERE f.project_id = $pid RETURN count(f) as n", {"pid": pid}
+        n_files = _stat_count(
+            ps,
+            "MATCH (f:File) WHERE f.project_id = $pid RETURN count(f) as n",
+            {"pid": pid},
         )
-        classes = ps.query_records(
+        n_classes = _stat_count(
+            ps,
             """
             MATCH (f:File) WHERE f.project_id = $pid
             WITH f
@@ -758,7 +819,8 @@ def stats(as_json: bool, show_shards: bool) -> None:
             """,
             {"pid": pid},
         )
-        methods = ps.query_records(
+        n_methods = _stat_count(
+            ps,
             """
             MATCH (f:File) WHERE f.project_id = $pid
             WITH f
@@ -769,7 +831,8 @@ def stats(as_json: bool, show_shards: bool) -> None:
             """,
             {"pid": pid},
         )
-        calls = ps.query_records(
+        n_calls = _stat_count(
+            ps,
             """
             MATCH (f:File) WHERE f.project_id = $pid
             WITH f
@@ -780,7 +843,8 @@ def stats(as_json: bool, show_shards: bool) -> None:
             """,
             {"pid": pid},
         )
-        emb = ps.query_records(
+        n_emb = _stat_count(
+            ps,
             """
             MATCH (f:File) WHERE f.project_id = $pid
             WITH f
@@ -793,11 +857,11 @@ def stats(as_json: bool, show_shards: bool) -> None:
             "project": pid,
             "path": p["path"],
             "shard": sg.router.shard_for(pid),
-            "files": files[0]["n"] if files else 0,
-            "classes": classes[0]["n"] if classes else 0,
-            "methods": methods[0]["n"] if methods else 0,
-            "calls_out": calls[0]["n"] if calls else 0,
-            "embeddings": emb[0]["n"] if emb else 0,
+            "files": n_files,
+            "classes": n_classes,
+            "methods": n_methods,
+            "calls_out": n_calls,
+            "embeddings": n_emb,
         })
     if as_json:

codespine-1.0.6/codespine/db/_cypher_compat.py ADDED Viewed

@@ -0,0 +1,523 @@
+"""Cypher-to-SQL translation for CodeSpine's DuckDB backend.
+Translates the specific subset of OpenCypher used by CodeSpine into
+equivalent DuckDB SQL so that every ``store.query_records(cypher, params)``
+call continues to work without touching the call-sites.
+Supported constructs
+--------------------
+- Node patterns            MATCH (alias:Label) or (a:L {prop: $v})
+- Anonymous nodes          (:Label) in NOT-EXISTS subqueries
+- Relationship patterns    (a)-[r:REL]->(b) directed
+- Undirected edges         (a)-[r:REL]-(b)  → OR of both directions
+- Virtual FK edges         (a)-[:HAS_METHOD]->(b) → b.class_id = a.id (no edge table)
+- Multi-hop patterns       (a)-[:R1]->(x)-[:R2]->(b)
+- Anonymous destination    (a)-[:CALLS]->()
+- Multi-MATCH + WITH       Multiple MATCH clauses joined by WITH pipeline stages
+- WHERE                    =, <>, IN, CONTAINS, lower(), coalesce(),
+                           IS NULL, IS NOT NULL, >=, <=
+- NOT EXISTS subqueries    NOT EXISTS { MATCH (:N)-[:R]->(m) }
+- WITH … ORDER BY          Kuzu paging construct → plain ORDER BY
+- DISTINCT, ORDER BY, LIMIT
+- Aggregates               count(n) → count(*)
+- Literal values           'string' in RETURN (e.g. 'method' as kind)
+"""
+from __future__ import annotations
+import re
+from typing import Any
+# ---------------------------------------------------------------------------
+# Schema mappings
+# ---------------------------------------------------------------------------
+# Kuzu node label → DuckDB table name
+_LABEL_TABLE: dict[str, str] = {
+    "Project":   "projects",
+    "File":      "files",
+    "Class":     "classes",
+    "Method":    "methods",
+    "Symbol":    "symbols",
+    "Community": "communities",
+    "Flow":      "flows",
+}
+# Kuzu relationship type → (edge_table, src_col, dst_col, extra_where | None)
+_REL_EDGE: dict[str, tuple[str, str, str, str | None]] = {
+    "CALLS":           ("calls",             "source_id",    "target_id",    None),
+    "OVERRIDES":       ("references_type",   "src_id",       "dst_id",       "rel = 'OVERRIDES'"),
+    "IMPLEMENTS":      ("references_type",   "src_id",       "dst_id",       "rel = 'IMPLEMENTS'"),
+    "INJECTS":         ("injects",           "src_class_id", "dst_class_id", None),
+    "BINDS_INTERFACE": ("binds_interface",   "src_class_id", "dst_class_id", None),
+    "IN_COMMUNITY":    ("community_members", "symbol_id",    "community_id", None),
+    "IN_FLOW":         ("flow_members",      "symbol_id",    "flow_id",      None),
+    "CO_CHANGED_WITH": ("co_changed_with",   "file_a",       "file_b",       None),
+}
+# Virtual FK edges: backed by a foreign-key column rather than a separate
+# edge table.  Format: (src_label_table, dst_label_table, dst_fk_col)
+# e.g. HAS_METHOD: methods.class_id = class.id
+_VIRTUAL_REL_EDGE: dict[str, tuple[str, str, str]] = {
+    "HAS_METHOD": ("classes", "methods",  "class_id"),
+    "HAS_CLASS":  ("files",   "classes",  "file_id"),
+    "DECLARES":   ("files",   "symbols",  "file_id"),
+}
+# All real edge tables (used for anonymous total-count query)
+_ALL_EDGE_TABLES = (
+    "calls",
+    "references_type",
+    "injects",
+    "binds_interface",
+    "community_members",
+    "flow_members",
+    "co_changed_with",
+)
+# Top-level Cypher keywords recognised by the clause splitter.
+# Order matters: longer / more-specific patterns must come before shorter ones.
+_TOP_KEYWORDS = (
+    "OPTIONAL MATCH",
+    "ORDER BY",
+    "MATCH",
+    "WITH",
+    "WHERE",
+    "RETURN",
+    "LIMIT",
+)
+def is_cypher(query: str) -> bool:
+    """Return True if *query* looks like Cypher rather than SQL."""
+    return bool(re.match(r"(?i)\s*MATCH\b", query.lstrip()))
+# ---------------------------------------------------------------------------
+# Main entry point
+# ---------------------------------------------------------------------------
+def translate(cypher: str, params: dict[str, Any] | None = None) -> tuple[str, dict[str, Any]]:
+    """Translate *cypher* to DuckDB SQL.
+    Returns ``(sql, params_dict)`` where *params_dict* preserves the original
+    ``$name`` bindings so they can be passed directly to
+    ``duckdb.connect().execute(sql, params_dict)``.
+    """
+    sql = _translate(cypher)
+    return sql, (params or {})
+# ---------------------------------------------------------------------------
+# Clause splitter
+# ---------------------------------------------------------------------------
+def _split_clauses(q: str) -> list[tuple[str, str]]:
+    """Tokenise a (whitespace-normalised) Cypher query into clause pairs.
+    Returns a list of ``(keyword, body)`` tuples at the TOP level of the
+    query.  Keywords inside ``()``, ``[]``, ``{}`` or quoted strings are
+    NOT treated as clause boundaries.
+    Example::
+        "MATCH (f:File) WHERE f.id = $x WITH f MATCH (c:Class) RETURN c.name"
+        →  [("MATCH", "(f:File)"),
+            ("WHERE", "f.id = $x"),
+            ("WITH",  "f"),
+            ("MATCH", "(c:Class)"),
+            ("RETURN","c.name")]
+    """
+    results: list[tuple[str, str]] = []
+    n = len(q)
+    i = 0
+    depth_paren = depth_sq = depth_brace = 0
+    in_quote = False
+    quote_char = ""
+    current_kw: str | None = None
+    current_start = 0
+    while i < n:
+        ch = q[i]
+        # ── Quote handling ────────────────────────────────────────────────
+        if in_quote:
+            if ch == "\\" and i + 1 < n:
+                i += 2        # skip escaped char
+                continue
+            if ch == quote_char:
+                in_quote = False
+            i += 1
+            continue
+        if ch in ('"', "'"):
+            in_quote = True
+            quote_char = ch
+            i += 1
+            continue
+        # ── Depth tracking ────────────────────────────────────────────────
+        if ch == "(":
+            depth_paren += 1
+        elif ch == ")":
+            depth_paren = max(0, depth_paren - 1)
+        elif ch == "[":
+            depth_sq += 1
+        elif ch == "]":
+            depth_sq = max(0, depth_sq - 1)
+        elif ch == "{":
+            depth_brace += 1
+        elif ch == "}":
+            depth_brace = max(0, depth_brace - 1)
+        # ── Keyword detection (top-level only) ───────────────────────────
+        if depth_paren == 0 and depth_sq == 0 and depth_brace == 0:
+            for kw in _TOP_KEYWORDS:
+                kl = len(kw)
+                if q[i : i + kl].upper() == kw:
+                    end_pos = i + kl
+                    # Require word boundary: end-of-string or non-word char
+                    if end_pos < n and (q[end_pos].isalnum() or q[end_pos] == "_"):
+                        continue     # e.g. "MATCHING" is not "MATCH"
+                    # Flush previous clause
+                    if current_kw is not None:
+                        body = q[current_start:i].strip()
+                        results.append((current_kw, body))
+                    current_kw = kw
+                    current_start = end_pos
+                    i = end_pos
+                    break
+            else:
+                i += 1
+        else:
+            i += 1
+    # Flush final clause
+    if current_kw is not None:
+        body = q[current_start:].strip()
+        results.append((current_kw, body))
+    return results
+# ---------------------------------------------------------------------------
+# Internal translation pipeline
+# ---------------------------------------------------------------------------
+def _translate_anonymous_edge_count(cypher: str) -> str | None:
+    """Fast-path for ``MATCH ()-[r]->() RETURN count(r) [as X]``.
+    Anonymous edge patterns carry no labels so the generic translator
+    cannot derive a FROM table.  We special-case the count-all-edges
+    pattern by summing row-counts across every edge table.
+    """
+    q = re.sub(r"\s+", " ", cypher.strip())
+    m = re.match(
+        r"(?i)MATCH\s*\(\s*\)\s*-\s*\[\s*\w*\s*\]\s*->\s*\(\s*\)\s*"
+        r"RETURN\s+count\s*\(\s*[*]?\w*\s*\)\s*(?:as\s+(\w+))?\s*$",
+        q,
+    )
+    if not m:
+        return None
+    alias = m.group(1) or "count"
+    unions = " UNION ALL ".join(
+        f"SELECT COUNT(*) AS c FROM {tbl}" for tbl in _ALL_EDGE_TABLES
+    )
+    return f"SELECT COALESCE(SUM(c), 0) AS {alias} FROM ({unions}) t"
+def _translate(cypher: str) -> str:
+    # ── Fast-path: anonymous total-edge-count ────────────────────────────
+    special = _translate_anonymous_edge_count(cypher)
+    if special is not None:
+        return special
+    q = re.sub(r"\s+", " ", cypher.strip())
+    clauses = _split_clauses(q)
+    aliases: dict[str, str] = {}      # alias → table name
+    inline_conds: list[str] = []      # WHERE from inline {prop: $val}
+    edge_conds: list[str] = []        # join conditions from real edge tables
+    virtual_conds: list[str] = []     # FK conditions from virtual edges
+    where_parts: list[str] = []       # collected WHERE bodies
+    ret_cols = "*"
+    ret_distinct = ""
+    order_clause = ""
+    limit_clause = ""
+    _rel_counter = {"n": 0}
+    for kw, body in clauses:
+        if kw == "MATCH":
+            _absorb_match(body, aliases, inline_conds, edge_conds,
+                          virtual_conds, _rel_counter)
+        elif kw == "OPTIONAL MATCH":
+            # Degenerate: register new node aliases so their columns are
+            # selectable, but don't add INNER JOIN constraints.
+            # Full LEFT JOIN support is a future enhancement.
+            _absorb_match_nodes_only(body, aliases)
+        elif kw == "WITH":
+            # Paging idiom: WITH x ORDER BY x.col LIMIT n
+            ob = re.search(r"(?i)ORDER\s+BY\s+(.+?)(?:\s+LIMIT\s+\S+)?\s*$", body)
+            if ob and not order_clause:
+                order_clause = "ORDER BY " + ob.group(1).strip()
+            lm = re.search(r"(?i)LIMIT\s+(\S+)", body)
+            if lm and not limit_clause:
+                limit_clause = "LIMIT " + lm.group(1)
+            # Pipeline-separator WITH (no ORDER BY) is simply dropped.
+        elif kw == "WHERE":
+            where_parts.append(body)
+        elif kw == "RETURN":
+            # DISTINCT
+            dm = re.match(r"(?i)DISTINCT\s+(.*)", body)
+            if dm:
+                ret_distinct = "DISTINCT "
+                body = dm.group(1)
+            # Trailing ORDER BY inside RETURN
+            ob = re.search(r"(?i)\s+ORDER\s+BY\s+(.+?)(?=\s*(?:LIMIT|$))", body)
+            if ob and not order_clause:
+                order_clause = "ORDER BY " + ob.group(1).strip()
+                body = body[: ob.start()].strip()
+            # Trailing LIMIT inside RETURN
+            lm = re.search(r"(?i)\s+LIMIT\s+(\S+)", body)
+            if lm and not limit_clause:
+                limit_clause = "LIMIT " + lm.group(1)
+                body = body[: lm.start()].strip()
+            ret_cols = body.strip()
+        elif kw == "ORDER BY":
+            if not order_clause:
+                order_clause = "ORDER BY " + body
+        elif kw == "LIMIT":
+            if not limit_clause:
+                limit_clause = "LIMIT " + body.split()[0]
+    # ── FROM clause ───────────────────────────────────────────────────────
+    seen: set[str] = set()
+    from_parts: list[str] = []
+    for alias, tbl in aliases.items():
+        entry = f"{tbl} {alias}"
+        if entry not in seen:
+            from_parts.append(entry)
+            seen.add(entry)
+    from_str = ", ".join(from_parts) if from_parts else "(SELECT 1 WHERE 1=0) _empty(x)"
+    # ── WHERE clause ──────────────────────────────────────────────────────
+    all_conds: list[str] = []
+    all_conds.extend(edge_conds)
+    all_conds.extend(virtual_conds)
+    all_conds.extend(inline_conds)
+    for wp in where_parts:
+        expanded = _expand_not_exists(wp, aliases)
+        transformed = _transform_where(expanded)
+        if transformed:
+            all_conds.append(transformed)
+    where_str = " AND ".join(c for c in all_conds if c)
+    # ── SELECT ────────────────────────────────────────────────────────────
+    select_str = _transform_select(ret_cols)
+    # ── Assemble ──────────────────────────────────────────────────────────
+    parts = [f"SELECT {ret_distinct}{select_str}", f"FROM {from_str}"]
+    if where_str:
+        parts.append(f"WHERE {where_str}")
+    if order_clause:
+        parts.append(order_clause)
+    if limit_clause:
+        parts.append(limit_clause)
+    return " ".join(parts)
+# ---------------------------------------------------------------------------
+# MATCH body processing
+# ---------------------------------------------------------------------------
+# Rel pattern: (src)-[alias:TYPE]->(dst) or (src)-[alias:TYPE]-(dst)
+# dst alias is optional (anonymous destination)
+_REL_DIRECTED_RE = re.compile(
+    r"\((\w+)(?::\w+(?:\s*\{[^}]*\})?)?\)"   # src node
+    r"\s*-\[(\w*):(\w+)\]->\s*"               # -[alias:TYPE]->
+    r"\((\w*)(?::\w+(?:\s*\{[^}]*\})?)?\)"    # dst node (alias optional)
+)
+_REL_UNDIRECTED_RE = re.compile(
+    r"\((\w+)(?::\w+(?:\s*\{[^}]*\})?)?\)"   # src node
+    r"\s*-\[(\w*):(\w+)\]-\s*"                # -[alias:TYPE]-
+    r"\((\w*)(?::\w+(?:\s*\{[^}]*\})?)?\)"    # dst node
+)
+_NODE_RE = re.compile(r"\((\w+):(\w+)(?:\s*\{([^}]+)\})?\)")
+def _absorb_match(
+    body: str,
+    aliases: dict[str, str],
+    inline_conds: list[str],
+    edge_conds: list[str],
+    virtual_conds: list[str],
+    _rel_counter: dict[str, int],
+) -> None:
+    """Extract node aliases and relationship patterns from one MATCH body."""
+    # 1. Register named node patterns (alias:Label [{prop: $val}])
+    for m in _NODE_RE.finditer(body):
+        alias, label, inline = m.group(1), m.group(2), m.group(3)
+        if label in _LABEL_TABLE and alias not in aliases:
+            aliases[alias] = _LABEL_TABLE[label]
+        if inline:
+            for kv in re.finditer(r"(\w+)\s*:\s*(\$\w+)", inline):
+                inline_conds.append(f"{alias}.{kv.group(1)} = {kv.group(2)}")
+    # 2. Process directed rel patterns iteratively (handles multi-hop chains).
+    def _do_directed(m_obj: re.Match) -> str:
+        src, ralias, rtype, dst = (
+            m_obj.group(1), m_obj.group(2), m_obj.group(3), m_obj.group(4)
+        )
+        _process_rel(src, ralias, rtype, dst,
+                     aliases, edge_conds, virtual_conds, _rel_counter,
+                     undirected=False)
+        # Return just the dst node so multi-hop chains resolve left→right.
+        return f"({dst})" if dst else "()"
+    q = body
+    prev = None
+    while prev != q:
+        prev = q
+        q = _REL_DIRECTED_RE.sub(_do_directed, q)
+    # 3. Process undirected rel patterns.
+    for m in _REL_UNDIRECTED_RE.finditer(body):
+        src, ralias, rtype, dst = (
+            m.group(1), m.group(2), m.group(3), m.group(4)
+        )
+        _process_rel(src, ralias, rtype, dst,
+                     aliases, edge_conds, virtual_conds, _rel_counter,
+                     undirected=True)
+def _absorb_match_nodes_only(body: str, aliases: dict[str, str]) -> None:
+    """Register node aliases from an OPTIONAL MATCH without adding joins.
+    This ensures columns from OPTIONAL MATCH nodes are reachable in SELECT/
+    WHERE even though we don't yet emit a proper LEFT JOIN.
+    """
+    for m in _NODE_RE.finditer(body):
+        alias, label = m.group(1), m.group(2)
+        if label in _LABEL_TABLE and alias not in aliases:
+            aliases[alias] = _LABEL_TABLE[label]
+def _process_rel(
+    src_alias: str,
+    rel_alias: str,
+    rel_type: str,
+    dst_alias: str,
+    aliases: dict[str, str],
+    edge_conds: list[str],
+    virtual_conds: list[str],
+    _rel_counter: dict[str, int],
+    *,
+    undirected: bool,
+) -> None:
+    """Emit join conditions for one relationship hop."""
+    # ── Virtual FK edge (no edge table) ──────────────────────────────────
+    if rel_type in _VIRTUAL_REL_EDGE:
+        _, dst_tbl, dst_fk_col = _VIRTUAL_REL_EDGE[rel_type]
+        # Register dst alias if it carries a label (already done in
+        # _absorb_match's node scan, but also handle un-labelled refs).
+        if dst_alias and dst_alias not in aliases:
+            aliases[dst_alias] = dst_tbl
+        if dst_alias:
+            virtual_conds.append(f"{dst_alias}.{dst_fk_col} = {src_alias}.id")
+        return
+    # ── Real edge table ───────────────────────────────────────────────────
+    if rel_type not in _REL_EDGE:
+        return     # unknown relationship type — skip silently
+    edge_tbl, src_col, dst_col, extra = _REL_EDGE[rel_type]
+    _rel_counter["n"] += 1
+    ra = rel_alias or f"_r{_rel_counter['n']}"
+    if ra not in aliases:
+        aliases[ra] = edge_tbl
+        edge_conds.append(f"{ra}.{src_col} = {src_alias}.id")
+        if dst_alias:
+            if undirected:
+                # Undirected: match either direction.
+                edge_conds.append(
+                    f"({ra}.{dst_col} = {dst_alias}.id"
+                    f" OR {ra}.{src_col} = {dst_alias}.id)"
+                )
+            else:
+                edge_conds.append(f"{ra}.{dst_col} = {dst_alias}.id")
+        if extra:
+            edge_conds.append(f"{ra}.{extra}")
+# ---------------------------------------------------------------------------
+# WHERE expansion
+# ---------------------------------------------------------------------------
+def _expand_not_exists(body: str, aliases: dict[str, str]) -> str:
+    """Replace ``NOT EXISTS { MATCH ... }`` with a SQL subquery."""
+    def _replace(m_obj: re.Match) -> str:
+        inner = m_obj.group(1)
+        rel_m = re.search(
+            r"\((\w*):?(\w*)\)-\[:(\w+)\]->\((\w*):?(\w*)\)", inner
+        )
+        if not rel_m:
+            return m_obj.group(0)
+        src_alias = rel_m.group(1)
+        rel_type  = rel_m.group(3)
+        dst_alias = rel_m.group(4)
+        if rel_type not in _REL_EDGE:
+            return m_obj.group(0)
+        edge_tbl, src_col, dst_col, _ = _REL_EDGE[rel_type]
+        if dst_alias and dst_alias in aliases:
+            return (
+                f"NOT EXISTS (SELECT 1 FROM {edge_tbl} _ne"
+                f" WHERE _ne.{dst_col} = {dst_alias}.id)"
+            )
+        if src_alias and src_alias in aliases:
+            return (
+                f"NOT EXISTS (SELECT 1 FROM {edge_tbl} _ne"
+                f" WHERE _ne.{src_col} = {src_alias}.id)"
+            )
+        return m_obj.group(0)
+    return re.sub(
+        r"NOT\s+EXISTS\s*\{\s*MATCH\s*([^}]+)\}",
+        _replace,
+        body,
+        flags=re.IGNORECASE,
+    )
+# ---------------------------------------------------------------------------
+# Clause-level transformations
+# ---------------------------------------------------------------------------
+def _transform_where(where: str) -> str:
+    if not where:
+        return ""
+    # CONTAINS → LIKE
+    where = re.sub(
+        r"(\w+\.\w+|\blower\([^)]+\)|\bcoalesce\([^)]+\))\s+CONTAINS\s+(\$\w+|'[^']*')",
+        lambda m: f"{m.group(1)} LIKE '%' || {m.group(2)} || '%'",
+        where,
+        flags=re.IGNORECASE,
+    )
+    # IN $list → = ANY($list)
+    where = re.sub(
+        r"\bIN\s+(\$\w+)\b",
+        r"= ANY(\1)",
+        where,
+        flags=re.IGNORECASE,
+    )
+    return where
+def _transform_select(ret: str) -> str:
+    if not ret:
+        return "*"
+    # count(alias) → count(*)
+    ret = re.sub(r"\bcount\s*\(\s*\w+\s*\)", "count(*)", ret, flags=re.IGNORECASE)
+    return ret

codespine 1.0.5__tar.gz → 1.0.6__tar.gz

codespine 1.0.5tar.gz → 1.0.6tar.gz