ltcai 3.4.1 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. package/README.md +206 -247
  2. package/docs/CARRYOVER_AUDIT_v3.6.0.md +61 -0
  3. package/docs/CHANGELOG.md +32 -0
  4. package/docs/HANDOVER_v3.6.0.md +46 -0
  5. package/docs/RUNTIME_HOOK_COVERAGE_v3.5.0.md +56 -0
  6. package/docs/RUNTIME_HOOK_COVERAGE_v3.6.0.md +49 -0
  7. package/docs/architecture.md +13 -12
  8. package/docs/kg-schema.md +55 -0
  9. package/docs/privacy.md +18 -2
  10. package/docs/security-model.md +17 -0
  11. package/kg_schema.py +46 -0
  12. package/knowledge_graph.py +520 -1
  13. package/latticeai/__init__.py +1 -1
  14. package/latticeai/api/auth.py +37 -9
  15. package/latticeai/api/browser.py +217 -0
  16. package/latticeai/api/chat.py +4 -1
  17. package/latticeai/api/computer_use.py +21 -8
  18. package/latticeai/api/portability.py +93 -0
  19. package/latticeai/api/tools.py +29 -26
  20. package/latticeai/core/config.py +3 -0
  21. package/latticeai/core/marketplace.py +1 -1
  22. package/latticeai/core/multi_agent.py +1 -1
  23. package/latticeai/core/oidc.py +205 -0
  24. package/latticeai/core/security.py +59 -5
  25. package/latticeai/core/workspace_os.py +1 -1
  26. package/latticeai/server_app.py +39 -0
  27. package/latticeai/services/ingestion.py +271 -0
  28. package/latticeai/services/kg_portability.py +177 -0
  29. package/package.json +5 -4
  30. package/requirements.txt +1 -0
  31. package/scripts/build_vsix.mjs +72 -0
  32. package/scripts/check_python.py +87 -0
  33. package/static/css/reference/account.css +1 -1
  34. package/static/css/reference/admin.css +1 -1
  35. package/static/css/reference/base.css +8 -5
  36. package/static/css/reference/chat.css +8 -8
  37. package/static/css/reference/graph.css +2 -2
  38. package/static/css/responsive.css +2 -2
  39. package/static/v3/asset-manifest.json +9 -9
  40. package/static/v3/css/{lattice.shell.6ceea7c8.css → lattice.shell.8fcc9d33.css} +2 -1
  41. package/static/v3/css/lattice.shell.css +2 -1
  42. package/static/v3/js/{app.d086489d.js → app.c541f955.js} +1 -1
  43. package/static/v3/js/core/{api.12b568ad.js → api.33d6320e.js} +38 -0
  44. package/static/v3/js/core/api.js +38 -0
  45. package/static/v3/js/core/{routes.d214b399.js → routes.2ce3815a.js} +1 -1
  46. package/static/v3/js/core/routes.js +1 -1
  47. package/static/v3/js/core/{shell.d05266f5.js → shell.8c163e0e.js} +2 -2
  48. package/static/v3/js/views/knowledge-graph.a96040a5.js +513 -0
  49. package/static/v3/js/views/knowledge-graph.js +293 -17
  50. package/static/workspace.css +1 -1
  51. package/tools/__init__.py +276 -0
  52. package/tools/commands.py +188 -0
  53. package/tools/computer.py +185 -0
  54. package/tools/documents.py +243 -0
  55. package/tools/filesystem.py +560 -0
  56. package/tools/knowledge.py +97 -0
  57. package/tools/local_files.py +69 -0
  58. package/tools/network.py +66 -0
  59. package/static/v3/js/views/knowledge-graph.a14ea7e7.js +0 -237
  60. package/tools.py +0 -1525
@@ -942,6 +942,31 @@ class KnowledgeGraphStore:
942
942
  error_message TEXT,
943
943
  metadata_json TEXT NOT NULL CHECK (json_valid(metadata_json))
944
944
  );
945
+ -- v3.6.0 Knowledge Graph First: per-ingestion provenance trail.
946
+ -- Append-only audit of where every graph node came from, when it
947
+ -- was captured, how it was processed, and whether it was embedded /
948
+ -- linked / used by an agent. get_provenance() returns the latest row.
949
+ CREATE TABLE IF NOT EXISTS ingestion_provenance (
950
+ id TEXT PRIMARY KEY,
951
+ node_id TEXT NOT NULL,
952
+ source_type TEXT NOT NULL,
953
+ source_uri TEXT,
954
+ content_hash TEXT,
955
+ title TEXT,
956
+ pipeline TEXT NOT NULL,
957
+ owner TEXT,
958
+ workspace_id TEXT,
959
+ captured_at TEXT,
960
+ modified_at TEXT,
961
+ embedded INTEGER NOT NULL DEFAULT 0,
962
+ linked INTEGER NOT NULL DEFAULT 0,
963
+ duplicate INTEGER NOT NULL DEFAULT 0,
964
+ agent_used TEXT,
965
+ chunk_count INTEGER NOT NULL DEFAULT 0,
966
+ permissions_json TEXT NOT NULL DEFAULT '{}' CHECK (json_valid(permissions_json)),
967
+ metadata_json TEXT NOT NULL DEFAULT '{}' CHECK (json_valid(metadata_json)),
968
+ created_at TEXT NOT NULL
969
+ );
945
970
  CREATE INDEX IF NOT EXISTS idx_nodes_type ON nodes(type);
946
971
  CREATE INDEX IF NOT EXISTS idx_edges_from ON edges(from_node);
947
972
  CREATE INDEX IF NOT EXISTS idx_edges_to ON edges(to_node);
@@ -954,6 +979,10 @@ class KnowledgeGraphStore:
954
979
  CREATE INDEX IF NOT EXISTS idx_vector_embeddings_source ON vector_embeddings(source_node);
955
980
  CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(embedding_model);
956
981
  CREATE INDEX IF NOT EXISTS idx_vector_index_operations_requested ON vector_index_operations(requested_at);
982
+ CREATE INDEX IF NOT EXISTS idx_provenance_node ON ingestion_provenance(node_id);
983
+ CREATE INDEX IF NOT EXISTS idx_provenance_source_type ON ingestion_provenance(source_type);
984
+ CREATE INDEX IF NOT EXISTS idx_provenance_hash ON ingestion_provenance(content_hash);
985
+ CREATE INDEX IF NOT EXISTS idx_provenance_created ON ingestion_provenance(created_at);
957
986
  """
958
987
  )
959
988
  conn.execute(
@@ -2703,12 +2732,20 @@ class KnowledgeGraphStore:
2703
2732
  uploader: Optional[str] = None,
2704
2733
  conversation_id: Optional[str] = None,
2705
2734
  extracted: Optional[Dict[str, Any]] = None,
2735
+ source_type: Optional[str] = None,
2736
+ source_uri: Optional[str] = None,
2737
+ captured_at: Optional[str] = None,
2738
+ modified_at: Optional[str] = None,
2739
+ owner: Optional[str] = None,
2740
+ workspace_id: Optional[str] = None,
2741
+ permissions: Optional[Dict[str, Any]] = None,
2706
2742
  ) -> Dict[str, Any]:
2707
2743
  path = Path(path)
2708
2744
  data = path.read_bytes()
2709
2745
  digest = _sha256_bytes(data)
2710
2746
  ext = path.suffix.lower()
2711
2747
  filename = original_filename or path.name
2748
+ captured_at = captured_at or _now()
2712
2749
  blob_path = self.blob_dir / digest[:2] / f"{digest}{ext}"
2713
2750
  blob_path.parent.mkdir(parents=True, exist_ok=True)
2714
2751
  if not blob_path.exists():
@@ -2723,8 +2760,16 @@ class KnowledgeGraphStore:
2723
2760
  "mime_type": mime_type,
2724
2761
  "bytes": len(data),
2725
2762
  "sha256": digest,
2763
+ "content_hash": digest,
2726
2764
  "blob_path": str(blob_path),
2727
2765
  "uploader": uploader,
2766
+ "owner": owner or uploader,
2767
+ "workspace_id": workspace_id,
2768
+ "permissions": permissions or {},
2769
+ "source_type": source_type or "file",
2770
+ "source_uri": source_uri or str(path),
2771
+ "captured_at": captured_at,
2772
+ "modified_at": modified_at,
2728
2773
  "conversation_id": conversation_id,
2729
2774
  "extracted": {k: v for k, v in (extracted or {}).items() if k != "content"},
2730
2775
  "structure": doc_meta,
@@ -2732,8 +2777,11 @@ class KnowledgeGraphStore:
2732
2777
  full_text = f"{filename}\n{text}"
2733
2778
  concepts = _extract_concepts(full_text, limit=15)
2734
2779
  triples = _extract_triples(full_text, concepts)
2780
+ chunk_ids: List[str] = []
2781
+ source_node_id: Optional[str] = None
2735
2782
 
2736
2783
  with self._connect() as conn:
2784
+ duplicate = self._node_exists(conn, file_id)
2737
2785
  # ── Document 노드 (점: 명사 — 파일) ────────────────────────────────
2738
2786
  self._upsert_node(
2739
2787
  conn, file_id, "Document", filename,
@@ -2742,6 +2790,15 @@ class KnowledgeGraphStore:
2742
2790
  )
2743
2791
  self._ingest_structure_nodes(conn, file_id, filename, doc_meta)
2744
2792
 
2793
+ # ── SOURCE 노드 + indexed_from (v3.6.0, source_type 지정 시) ──────
2794
+ if source_type:
2795
+ source_node_id = self._attach_source_node(
2796
+ conn, file_id,
2797
+ source_type=source_type, source_uri=source_uri or str(path),
2798
+ title=filename, content_hash=digest, captured_at=captured_at,
2799
+ extra={"owner": owner or uploader, "workspace_id": workspace_id, "ext": ext},
2800
+ )
2801
+
2745
2802
  # ── Person 노드 + 동사형 엣지 ─────────────────────────────────────
2746
2803
  if uploader:
2747
2804
  person_id = f"person:{_slug(uploader)}"
@@ -2762,6 +2819,7 @@ class KnowledgeGraphStore:
2762
2819
  # ── RAG chunks (검색용, 그래프 비표시) ────────────────────────────
2763
2820
  for index, chunk in enumerate(_chunks(text)):
2764
2821
  chunk_id = f"chunk:{_sha256_text(f'{file_id}:{index}:{chunk}')[:24]}"
2822
+ chunk_ids.append(chunk_id)
2765
2823
  self._upsert_node(
2766
2824
  conn, chunk_id, "Chunk",
2767
2825
  f"{filename} chunk {index + 1}",
@@ -2816,7 +2874,18 @@ class KnowledgeGraphStore:
2816
2874
  # 선: Document가 Task/Decision을 "포함함"
2817
2875
  self._upsert_edge(conn, file_id, sem_id, "포함함", weight=0.9)
2818
2876
 
2819
- return {"node_id": file_id, "sha256": digest, "metadata": metadata}
2877
+ return {
2878
+ "node_id": file_id,
2879
+ "type": "Document",
2880
+ "sha256": digest,
2881
+ "content_hash": digest,
2882
+ "source_node_id": source_node_id,
2883
+ "chunk_ids": chunk_ids,
2884
+ "chunk_count": len(chunk_ids),
2885
+ "duplicate": duplicate,
2886
+ "captured_at": captured_at,
2887
+ "metadata": metadata,
2888
+ }
2820
2889
 
2821
2890
  def ingest_event(
2822
2891
  self,
@@ -2854,6 +2923,449 @@ class KnowledgeGraphStore:
2854
2923
  self._upsert_edge(conn, person_id, event_id, "triggered", metadata={"event_type": event_type})
2855
2924
  return {"node_id": event_id, "type": event_type}
2856
2925
 
2926
+ # ── v3.6.0 Knowledge Graph First: unified source ingestion + provenance ──────
2927
+ def _node_exists(self, conn: sqlite3.Connection, node_id: str) -> bool:
2928
+ row = conn.execute("SELECT 1 FROM nodes WHERE id = ?", (node_id,)).fetchone()
2929
+ return row is not None
2930
+
2931
+ def node_is_embedded(self, node_id: str) -> bool:
2932
+ """True when a vector embedding exists for ``node_id`` (RAG-ready)."""
2933
+ with self._connect() as conn:
2934
+ row = conn.execute(
2935
+ "SELECT 1 FROM vector_embeddings WHERE item_id = ? LIMIT 1",
2936
+ (node_id,),
2937
+ ).fetchone()
2938
+ return row is not None
2939
+
2940
+ def _attach_source_node(
2941
+ self,
2942
+ conn: sqlite3.Connection,
2943
+ content_node_id: str,
2944
+ *,
2945
+ source_type: str,
2946
+ source_uri: Optional[str] = None,
2947
+ title: Optional[str] = None,
2948
+ content_hash: Optional[str] = None,
2949
+ captured_at: Optional[str] = None,
2950
+ extra: Optional[Dict[str, Any]] = None,
2951
+ ) -> str:
2952
+ """Create the SOURCE node for an ingested item and link it via INDEXED_FROM.
2953
+
2954
+ Every ingested content node points at exactly one SOURCE node, so the
2955
+ graph is always able to explain *where* a node came from. The source id
2956
+ is derived from (source_type, source_uri | content_hash) so re-ingesting
2957
+ the same origin reuses the same SOURCE node (idempotent).
2958
+ """
2959
+ key = source_uri or content_hash or content_node_id
2960
+ source_id = f"source:{_sha256_text(f'{source_type}|{key}')[:24]}"
2961
+ meta = {
2962
+ "source_type": source_type,
2963
+ "source_uri": source_uri,
2964
+ "content_hash": content_hash,
2965
+ "captured_at": captured_at or _now(),
2966
+ **(extra or {}),
2967
+ }
2968
+ label = title or source_uri or source_type
2969
+ self._upsert_node(
2970
+ conn, source_id, "Source", label,
2971
+ summary=str(source_uri or title or source_type)[:400],
2972
+ metadata=meta,
2973
+ )
2974
+ # 선: 콘텐츠 노드가 "이 출처에서 색인됨" (indexed_from → SOURCE)
2975
+ self._upsert_edge(conn, content_node_id, source_id, "indexed_from",
2976
+ weight=1.0, metadata={"source_type": source_type})
2977
+ return source_id
2978
+
2979
+ def ingest_source(
2980
+ self,
2981
+ *,
2982
+ source_type: str,
2983
+ title: str,
2984
+ text: str,
2985
+ source_uri: Optional[str] = None,
2986
+ owner: Optional[str] = None,
2987
+ workspace_id: Optional[str] = None,
2988
+ permissions: Optional[Dict[str, Any]] = None,
2989
+ captured_at: Optional[str] = None,
2990
+ modified_at: Optional[str] = None,
2991
+ conversation_id: Optional[str] = None,
2992
+ metadata: Optional[Dict[str, Any]] = None,
2993
+ ) -> Dict[str, Any]:
2994
+ """Unified text/web ingestion: one shape for URL, browser tab, note, text.
2995
+
2996
+ Creates a content ``Document`` node (idempotent by content hash), a
2997
+ ``Source`` node linked via ``indexed_from``, RAG chunks, and extracted
2998
+ Concept/Task/Decision nodes — mirroring ingest_document for non-file
2999
+ sources. Returns the full set of ids the caller needs to record
3000
+ provenance, including ``duplicate`` (was the content already indexed).
3001
+ """
3002
+ source_type = str(source_type or "text")
3003
+ text = str(text or "")
3004
+ title = _clean_text(str(title or source_uri or source_type))[:240] or source_type
3005
+ captured_at = captured_at or _now()
3006
+ content_hash = _sha256_text(f"{source_type}|{source_uri or ''}|{text}")
3007
+ content_id = f"webdoc:{content_hash[:24]}"
3008
+ full_text = f"{title}\n{text}"
3009
+ node_meta = {
3010
+ "source_type": source_type,
3011
+ "source_uri": source_uri,
3012
+ "content_hash": content_hash,
3013
+ "title": title,
3014
+ "captured_at": captured_at,
3015
+ "modified_at": modified_at,
3016
+ "owner": owner,
3017
+ "workspace_id": workspace_id,
3018
+ "permissions": permissions or {},
3019
+ "chars": len(text),
3020
+ **(metadata or {}),
3021
+ }
3022
+ concepts = _extract_concepts(full_text, limit=15)
3023
+ triples = _extract_triples(full_text, concepts)
3024
+ chunk_ids: List[str] = []
3025
+
3026
+ with self._connect() as conn:
3027
+ duplicate = self._node_exists(conn, content_id)
3028
+ # ── 콘텐츠 노드 (점: 명사 — 문서) ────────────────────────────────
3029
+ self._upsert_node(
3030
+ conn, content_id, "Document", title,
3031
+ summary=(text or title)[:500],
3032
+ metadata=node_meta, raw=node_meta,
3033
+ )
3034
+ # ── SOURCE 노드 + indexed_from 엣지 (출처 추적) ──────────────────
3035
+ source_node_id = self._attach_source_node(
3036
+ conn, content_id,
3037
+ source_type=source_type, source_uri=source_uri, title=title,
3038
+ content_hash=content_hash, captured_at=captured_at,
3039
+ extra={"owner": owner, "workspace_id": workspace_id},
3040
+ )
3041
+ # ── 소유자(Person) + 동사형 엣지 ────────────────────────────────
3042
+ if owner:
3043
+ person_id = f"person:{_slug(owner)}"
3044
+ self._upsert_node(conn, person_id, "Person", owner, metadata={"email": owner})
3045
+ self._upsert_edge(conn, person_id, content_id, "업로드함", weight=1.0)
3046
+ # ── 대화 연결 ───────────────────────────────────────────────────
3047
+ if conversation_id:
3048
+ conv_id = f"conversation:{_slug(conversation_id)}"
3049
+ self._upsert_node(conn, conv_id, "Chat", conversation_id)
3050
+ self._upsert_edge(conn, conv_id, content_id, "언급함", weight=0.8)
3051
+ # ── RAG 청크 ────────────────────────────────────────────────────
3052
+ for index, chunk in enumerate(_chunks(text)):
3053
+ chunk_id = f"chunk:{_sha256_text(f'{content_id}:{index}:{chunk}')[:24]}"
3054
+ chunk_ids.append(chunk_id)
3055
+ self._upsert_node(
3056
+ conn, chunk_id, "Chunk", f"{title} chunk {index + 1}",
3057
+ summary=chunk[:500], metadata={"index": index, "source_node": content_id},
3058
+ )
3059
+ self._upsert_chunk(conn, chunk_id=chunk_id, source_node=content_id,
3060
+ text=chunk, metadata={"index": index, "source_node": content_id})
3061
+ self._upsert_edge(conn, content_id, chunk_id, "포함함")
3062
+ # ── Concept / Feature / Error / Code 노드 + 엣지 ────────────────
3063
+ concept_ids: Dict[str, str] = {}
3064
+ for concept in concepts:
3065
+ node_t = _classify_node_type(concept, full_text)
3066
+ cid = f"{node_t.lower()}:{_slug(concept)}"
3067
+ concept_ids[concept.lower()] = cid
3068
+ self._upsert_node(conn, cid, node_t, concept,
3069
+ metadata={"auto_extracted": True, "source_type": source_type})
3070
+ self._upsert_edge(conn, content_id, cid, "포함함", weight=0.8)
3071
+ for triple in triples:
3072
+ subj_id = concept_ids.get(triple["subject"].lower())
3073
+ obj_id = concept_ids.get(triple["object"].lower())
3074
+ if subj_id and obj_id and subj_id != obj_id:
3075
+ self._upsert_edge(conn, subj_id, obj_id, triple["relation"],
3076
+ weight=1.0, metadata={"context": triple.get("context", "")[:240]})
3077
+ # ── Task / Decision 노드 ────────────────────────────────────────
3078
+ for item in _semantic_items(text):
3079
+ sem_type = item["type"]
3080
+ sem_title = item["title"]
3081
+ sem_id = f"{sem_type.lower()}:{_sha256_text(f'{content_id}:{sem_type}:{sem_title}')[:24]}"
3082
+ self._upsert_node(conn, sem_id, sem_type, sem_title, summary=item["summary"],
3083
+ metadata={"auto_extracted": True, "source_node": content_id}, raw=item)
3084
+ self._upsert_edge(conn, content_id, sem_id, "포함함", weight=0.9)
3085
+
3086
+ return {
3087
+ "node_id": content_id,
3088
+ "type": "Document",
3089
+ "source_node_id": source_node_id,
3090
+ "content_hash": content_hash,
3091
+ "chunk_ids": chunk_ids,
3092
+ "chunk_count": len(chunk_ids),
3093
+ "duplicate": duplicate,
3094
+ "captured_at": captured_at,
3095
+ }
3096
+
3097
+ def record_provenance(
3098
+ self,
3099
+ *,
3100
+ node_id: str,
3101
+ source_type: str,
3102
+ pipeline: str = "unified-ingestion",
3103
+ source_uri: Optional[str] = None,
3104
+ content_hash: Optional[str] = None,
3105
+ title: Optional[str] = None,
3106
+ owner: Optional[str] = None,
3107
+ workspace_id: Optional[str] = None,
3108
+ captured_at: Optional[str] = None,
3109
+ modified_at: Optional[str] = None,
3110
+ embedded: bool = False,
3111
+ linked: bool = False,
3112
+ duplicate: bool = False,
3113
+ agent_used: Optional[str] = None,
3114
+ chunk_count: int = 0,
3115
+ permissions: Optional[Dict[str, Any]] = None,
3116
+ metadata: Optional[Dict[str, Any]] = None,
3117
+ ) -> Dict[str, Any]:
3118
+ """Append a provenance record for an ingested node (audit trail)."""
3119
+ now = _now()
3120
+ prov_basis = f"{node_id}|{content_hash or ''}|{now}"
3121
+ prov_id = f"prov:{_sha256_text(prov_basis)[:24]}"
3122
+ with self._connect() as conn:
3123
+ conn.execute(
3124
+ """
3125
+ INSERT OR REPLACE INTO ingestion_provenance(
3126
+ id, node_id, source_type, source_uri, content_hash, title, pipeline,
3127
+ owner, workspace_id, captured_at, modified_at, embedded, linked,
3128
+ duplicate, agent_used, chunk_count, permissions_json, metadata_json, created_at)
3129
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
3130
+ """,
3131
+ (
3132
+ prov_id, node_id, source_type, source_uri, content_hash, title, pipeline,
3133
+ owner, workspace_id, captured_at, modified_at, 1 if embedded else 0,
3134
+ 1 if linked else 0, 1 if duplicate else 0, agent_used, int(chunk_count or 0),
3135
+ _json(permissions or {}), _json(metadata or {}), now,
3136
+ ),
3137
+ )
3138
+ return {"id": prov_id, "node_id": node_id, "created_at": now}
3139
+
3140
+ @staticmethod
3141
+ def _provenance_row(row: sqlite3.Row) -> Dict[str, Any]:
3142
+ return {
3143
+ "id": row["id"],
3144
+ "node_id": row["node_id"],
3145
+ "source_type": row["source_type"],
3146
+ "source_uri": row["source_uri"],
3147
+ "content_hash": row["content_hash"],
3148
+ "title": row["title"],
3149
+ "pipeline": row["pipeline"],
3150
+ "owner": row["owner"],
3151
+ "workspace_id": row["workspace_id"],
3152
+ "captured_at": row["captured_at"],
3153
+ "modified_at": row["modified_at"],
3154
+ "embedded": bool(row["embedded"]),
3155
+ "linked": bool(row["linked"]),
3156
+ "duplicate": bool(row["duplicate"]),
3157
+ "agent_used": row["agent_used"],
3158
+ "chunk_count": row["chunk_count"],
3159
+ "permissions": _safe_loads(row["permissions_json"]),
3160
+ "metadata": _safe_loads(row["metadata_json"]),
3161
+ "created_at": row["created_at"],
3162
+ }
3163
+
3164
+ def get_provenance(self, node_id: str) -> Optional[Dict[str, Any]]:
3165
+ """Return the most recent provenance record for a node, or None."""
3166
+ with self._connect() as conn:
3167
+ row = conn.execute(
3168
+ "SELECT * FROM ingestion_provenance WHERE node_id = ? "
3169
+ "ORDER BY created_at DESC, rowid DESC LIMIT 1",
3170
+ (node_id,),
3171
+ ).fetchone()
3172
+ return self._provenance_row(row) if row else None
3173
+
3174
+ def list_provenance(self, *, limit: int = 100, source_type: Optional[str] = None) -> Dict[str, Any]:
3175
+ """Recent provenance records (newest first), optionally by source_type."""
3176
+ limit = max(1, min(int(limit or 100), 1000))
3177
+ with self._connect() as conn:
3178
+ if source_type:
3179
+ rows = conn.execute(
3180
+ "SELECT * FROM ingestion_provenance WHERE source_type = ? "
3181
+ "ORDER BY created_at DESC, rowid DESC LIMIT ?",
3182
+ (source_type, limit),
3183
+ ).fetchall()
3184
+ else:
3185
+ rows = conn.execute(
3186
+ "SELECT * FROM ingestion_provenance "
3187
+ "ORDER BY created_at DESC, rowid DESC LIMIT ?",
3188
+ (limit,),
3189
+ ).fetchall()
3190
+ return {"items": [self._provenance_row(r) for r in rows], "count": len(rows)}
3191
+
3192
+ def provenance_stats(self) -> Dict[str, Any]:
3193
+ """Aggregate provenance counts for the Knowledge Graph status surface."""
3194
+ with self._connect() as conn:
3195
+ total = conn.execute("SELECT COUNT(*) AS c FROM ingestion_provenance").fetchone()["c"]
3196
+ by_source = {
3197
+ r["source_type"]: r["c"]
3198
+ for r in conn.execute(
3199
+ "SELECT source_type, COUNT(*) AS c FROM ingestion_provenance GROUP BY source_type"
3200
+ ).fetchall()
3201
+ }
3202
+ embedded = conn.execute(
3203
+ "SELECT COUNT(*) AS c FROM ingestion_provenance WHERE embedded = 1"
3204
+ ).fetchone()["c"]
3205
+ duplicates = conn.execute(
3206
+ "SELECT COUNT(*) AS c FROM ingestion_provenance WHERE duplicate = 1"
3207
+ ).fetchone()["c"]
3208
+ last = conn.execute(
3209
+ "SELECT created_at FROM ingestion_provenance ORDER BY created_at DESC LIMIT 1"
3210
+ ).fetchone()
3211
+ return {
3212
+ "total": total,
3213
+ "by_source_type": by_source,
3214
+ "embedded": embedded,
3215
+ "duplicates": duplicates,
3216
+ "last_ingested_at": last["created_at"] if last else None,
3217
+ }
3218
+
3219
+ # ── v3.6.0 portability: logical export / import + binary backup ──────────────
3220
+ def schema_versions(self) -> Dict[str, Any]:
3221
+ """Versions an exporter stamps and an importer validates against."""
3222
+ try:
3223
+ from kg_schema import EMBED_DIM as _EMBED_DIM, KG_SCHEMA_V2_VERSION as _V2
3224
+ except Exception: # pragma: no cover - kg_schema always importable in practice
3225
+ _EMBED_DIM, _V2 = 1024, 2
3226
+ return {
3227
+ "graph_schema_version": GRAPH_SCHEMA_VERSION,
3228
+ "kg_v2_schema_version": _V2,
3229
+ "projection_version": _PROJECTION_VERSION,
3230
+ "embed_dim": _EMBED_DIM,
3231
+ }
3232
+
3233
+ def export_graph_data(self) -> Dict[str, Any]:
3234
+ """Raw, lossless logical export of the graph (nodes/edges/chunks/sources/
3235
+ provenance). Vector embeddings are intentionally omitted — they are
3236
+ re-derived on import — so the artifact stays portable and small. Use
3237
+ :meth:`backup_database` for a faithful binary copy incl. embeddings.
3238
+ """
3239
+ with self._connect() as conn:
3240
+ def rows(table: str):
3241
+ return [dict(r) for r in conn.execute(f"SELECT * FROM {table}").fetchall()]
3242
+
3243
+ data = {
3244
+ "nodes": rows("nodes"),
3245
+ "edges": rows("edges"),
3246
+ "chunks": rows("chunks"),
3247
+ "knowledge_sources": rows("knowledge_sources"),
3248
+ "provenance": rows("ingestion_provenance"),
3249
+ }
3250
+ data["counts"] = {k: len(v) for k, v in data.items()}
3251
+ return data
3252
+
3253
+ def import_graph_data(
3254
+ self, data: Dict[str, Any], *, mode: str = "merge", dry_run: bool = False
3255
+ ) -> Dict[str, Any]:
3256
+ """Import a logical export back into the store.
3257
+
3258
+ ``mode='merge'`` upserts on top of existing data (id collisions update);
3259
+ ``mode='replace'`` clears the graph first. ``dry_run=True`` reports the
3260
+ plan without writing. Refuses artifacts from a NEWER graph schema than
3261
+ this build.
3262
+ """
3263
+ nodes = data.get("nodes") or []
3264
+ edges = data.get("edges") or []
3265
+ chunks = data.get("chunks") or []
3266
+ sources = data.get("knowledge_sources") or []
3267
+ provenance = data.get("provenance") or []
3268
+
3269
+ header = data.get("header") or {}
3270
+ incoming_schema = header.get("graph_schema_version")
3271
+ if isinstance(incoming_schema, int) and incoming_schema > GRAPH_SCHEMA_VERSION:
3272
+ raise ValueError(
3273
+ f"Artifact graph_schema_version {incoming_schema} is newer than this "
3274
+ f"build ({GRAPH_SCHEMA_VERSION}); refusing to import."
3275
+ )
3276
+
3277
+ plan = {
3278
+ "mode": mode,
3279
+ "nodes": len(nodes),
3280
+ "edges": len(edges),
3281
+ "chunks": len(chunks),
3282
+ "knowledge_sources": len(sources),
3283
+ "provenance": len(provenance),
3284
+ }
3285
+ if dry_run:
3286
+ plan["dry_run"] = True
3287
+ return plan
3288
+
3289
+ if mode == "replace":
3290
+ self.clear_all()
3291
+
3292
+ with self._connect() as conn:
3293
+ for n in nodes:
3294
+ self._upsert_node(
3295
+ conn, n["id"], n["type"], n.get("title") or "",
3296
+ summary=n.get("summary") or "",
3297
+ metadata=_safe_loads(n.get("metadata_json")),
3298
+ raw=_safe_loads(n.get("raw_json")),
3299
+ )
3300
+ for c in chunks:
3301
+ self._upsert_chunk(
3302
+ conn, chunk_id=c["id"], source_node=c["source_node"],
3303
+ text=c.get("text") or "", metadata=_safe_loads(c.get("metadata_json")),
3304
+ )
3305
+ for e in edges:
3306
+ self._upsert_edge(
3307
+ conn, e["from_node"], e["to_node"], e["type"],
3308
+ weight=float(e.get("weight") or 1.0),
3309
+ metadata=_safe_loads(e.get("metadata_json")),
3310
+ )
3311
+ for s in sources:
3312
+ conn.execute(
3313
+ """
3314
+ INSERT OR REPLACE INTO knowledge_sources(
3315
+ id, root_path, os_type, drive_id, label, status, include_ocr,
3316
+ watch_enabled, consent_json, created_at, updated_at, last_scanned_at)
3317
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
3318
+ """,
3319
+ (
3320
+ s["id"], s["root_path"], s["os_type"], s.get("drive_id"), s.get("label"),
3321
+ s.get("status") or "active", int(s.get("include_ocr") or 0),
3322
+ int(s.get("watch_enabled") or 0), s.get("consent_json") or "{}",
3323
+ s.get("created_at") or _now(), s.get("updated_at") or _now(),
3324
+ s.get("last_scanned_at"),
3325
+ ),
3326
+ )
3327
+ for p in provenance:
3328
+ conn.execute(
3329
+ """
3330
+ INSERT OR REPLACE INTO ingestion_provenance(
3331
+ id, node_id, source_type, source_uri, content_hash, title, pipeline,
3332
+ owner, workspace_id, captured_at, modified_at, embedded, linked,
3333
+ duplicate, agent_used, chunk_count, permissions_json, metadata_json, created_at)
3334
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
3335
+ """,
3336
+ (
3337
+ p["id"], p["node_id"], p["source_type"], p.get("source_uri"),
3338
+ p.get("content_hash"), p.get("title"), p.get("pipeline") or "import",
3339
+ p.get("owner"), p.get("workspace_id"), p.get("captured_at"),
3340
+ p.get("modified_at"), int(p.get("embedded") or 0), int(p.get("linked") or 0),
3341
+ int(p.get("duplicate") or 0), p.get("agent_used"), int(p.get("chunk_count") or 0),
3342
+ p.get("permissions_json") or "{}", p.get("metadata_json") or "{}",
3343
+ p.get("created_at") or _now(),
3344
+ ),
3345
+ )
3346
+ plan["imported"] = True
3347
+ return plan
3348
+
3349
+ def backup_database(self, dest_path) -> Path:
3350
+ """Write a clean, standalone snapshot of the live DB to ``dest_path``.
3351
+
3352
+ Uses ``VACUUM INTO`` (after a full WAL checkpoint) so the snapshot is a
3353
+ defragmented, rollback-journal-mode database with no companion -wal/-shm
3354
+ — which restores cleanly by a plain file copy. Captures all data incl.
3355
+ the vector_embeddings BLOBs.
3356
+ """
3357
+ dest = Path(dest_path)
3358
+ dest.parent.mkdir(parents=True, exist_ok=True)
3359
+ if dest.exists():
3360
+ dest.unlink() # VACUUM INTO requires the target to not exist
3361
+ conn = self._connect()
3362
+ try:
3363
+ conn.execute("PRAGMA wal_checkpoint(FULL)")
3364
+ conn.execute("VACUUM INTO ?", (str(dest),))
3365
+ finally:
3366
+ conn.close()
3367
+ return dest
3368
+
2857
3369
  def _ingest_structure_nodes(
2858
3370
  self,
2859
3371
  conn: sqlite3.Connection,
@@ -3044,6 +3556,13 @@ class KnowledgeGraphStore:
3044
3556
  "Feature", # 소프트웨어 기능
3045
3557
  "Task", # 할 일
3046
3558
  "Decision", # 결정 사항
3559
+ # v3.6.0 Knowledge Graph First — 1급 엔티티를 그래프에 노출
3560
+ "Source", # 수집 출처 (파일/URL/브라우저 탭/git)
3561
+ "Repository", # git 저장소
3562
+ "Meeting", # 회의
3563
+ "Organization", # 조직
3564
+ "Workflow", # 워크플로우
3565
+ "Agent", # 에이전트
3047
3566
  )
3048
3567
 
3049
3568
  def list_documents(self, limit: int = 200) -> Dict[str, Any]:
@@ -1,3 +1,3 @@
1
1
  """Lattice AI - modular server package."""
2
2
 
3
- __version__ = "3.4.1"
3
+ __version__ = "3.6.0"