dgk-lab-runtime 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,570 @@
1
+ """dgk-lab-runtime — Lab notebook runtime utilities for Marimo notebooks in Obsidian vaults."""
2
+
3
+ __version__ = "0.1.0"
4
+
5
+ __all__ = [
6
+ "is_pyodide_runtime",
7
+ "lab_runtime_context",
8
+ "require_local_runtime",
9
+ "normalize_dataset_path",
10
+ "dataset_candidate_paths",
11
+ "read_lab_json",
12
+ "load_lab_manifest",
13
+ "get_lab_dataset",
14
+ "read_lab_dataset",
15
+ "local_vault_path",
16
+ "write_local_json_snapshot",
17
+ "write_local_dataframe_snapshot",
18
+ "write_local_markdown_note",
19
+ "get_local_secret",
20
+ "clean_lab_text",
21
+ "fingerprint_data",
22
+ "with_data_provenance",
23
+ "read_local_text_file",
24
+ "read_local_bytes_file",
25
+ "parse_feed_xml",
26
+ "fetch_local_feed",
27
+ "fetch_local_url_text",
28
+ "scrape_local_page_text",
29
+ "extract_local_image_text",
30
+ ]
31
+
32
+
33
+ def is_pyodide_runtime() -> bool:
34
+ """Detecta se o notebook está rodando empacotado no Pyodide/WASM."""
35
+ try:
36
+ import pyodide # type: ignore # noqa: F401
37
+ except ImportError:
38
+ return False
39
+ return True
40
+
41
+
42
+ def lab_runtime_context(notebooks_path: str = "lab"):
43
+ """Descreve o modo atual do notebook para células com fallback local."""
44
+ import os as _os
45
+
46
+ packaged = is_pyodide_runtime()
47
+ resolved_notebooks_path = _os.environ.get("VAULT_NOTEBOOKS_PATH", notebooks_path)
48
+ local_capabilities = {
49
+ "filesystem": not packaged,
50
+ "secrets": not packaged,
51
+ "subprocess": not packaged,
52
+ "headlessBrowser": not packaged,
53
+ "ocr": not packaged,
54
+ "binaryFormats": not packaged,
55
+ }
56
+ return {
57
+ "runtime": "pyodide" if packaged else "local",
58
+ "isPackaged": packaged,
59
+ "isLocal": not packaged,
60
+ "canRunLocalEtl": not packaged,
61
+ "capabilities": local_capabilities,
62
+ "notebooksPath": resolved_notebooks_path,
63
+ "cwd": "" if packaged else _os.getcwd(),
64
+ }
65
+
66
+
67
+ def require_local_runtime(operation: str = "esta operação"):
68
+ """Bloqueia operações que não devem rodar no HTML empacotado."""
69
+ context = lab_runtime_context()
70
+ if not context["isLocal"]:
71
+ raise RuntimeError(
72
+ f"{operation} só pode rodar no modo local do notebook, antes do export."
73
+ )
74
+ return context
75
+
76
+
77
+ def normalize_dataset_path(path_or_url: str) -> str:
78
+ """Normaliza caminhos de ativos do Lab para um formato canônico.
79
+
80
+ - remove barras iniciais e prefixo ./
81
+ - converte barras do Windows para barras Unix
82
+ - remove prefixos duplicados de `assets/`
83
+ """
84
+
85
+ if path_or_url.startswith(("http://", "https://")):
86
+ return path_or_url
87
+
88
+ value = path_or_url.replace("\\", "/").strip()
89
+ value = value.removeprefix("./").removeprefix("/")
90
+ while value.startswith("assets/"):
91
+ value = value.removeprefix("assets/")
92
+ return value
93
+
94
+
95
+ def dataset_candidate_paths(path_or_url: str):
96
+ normalized = normalize_dataset_path(path_or_url)
97
+ if normalized.startswith(("http://", "https://")):
98
+ return [normalized]
99
+
100
+ candidates = []
101
+ if normalized:
102
+ candidates.append(normalized)
103
+ if not normalized.startswith("assets/"):
104
+ candidates.append(f"assets/{normalized}")
105
+ return candidates
106
+
107
+
108
+ def _read_lab_json_runtime(candidates):
109
+ import json as _json
110
+
111
+ from pyodide.http import open_url # type: ignore
112
+
113
+ last_error = None
114
+ for candidate in candidates:
115
+ try:
116
+ return _json.loads(open_url(candidate).read())
117
+ except Exception as exc:
118
+ last_error = exc
119
+ continue
120
+
121
+ if last_error:
122
+ raise RuntimeError(
123
+ "Não foi possível carregar o recurso de datasets."
124
+ ) from last_error
125
+ raise RuntimeError("Não foi possível carregar o recurso de datasets.")
126
+
127
+
128
+ def _read_lab_json_local(candidates, notebooks_path: str):
129
+ import json as _json
130
+ import os as _os
131
+
132
+ _notebooks_path = _os.environ.get("VAULT_NOTEBOOKS_PATH", notebooks_path)
133
+ last_error = None
134
+ for candidate in candidates:
135
+ candidate_path = _os.path.join(_os.getcwd(), "public", _notebooks_path, candidate)
136
+ try:
137
+ with open(candidate_path, encoding="utf-8") as f:
138
+ return _json.load(f)
139
+ except Exception as exc:
140
+ last_error = exc
141
+ continue
142
+
143
+ if last_error:
144
+ raise last_error
145
+ raise RuntimeError("Não foi possível carregar o recurso de datasets.")
146
+
147
+
148
+ def read_lab_json(path_or_url: str, notebooks_path: str = "lab"):
149
+ """Carrega JSON de dataset em ambiente Pyodide ou execução local.
150
+
151
+ Em Pyodide usa `open_url` para buscar URLs relativas ao diretório do site;
152
+ em execução local (CI/`uv`) cai para `public/<VAULT_NOTEBOOKS_PATH>/...`.
153
+ """
154
+
155
+ normalized = normalize_dataset_path(path_or_url)
156
+ if not normalized:
157
+ raise RuntimeError("Não foi possível carregar o recurso de datasets.")
158
+
159
+ if normalized.startswith(("http://", "https://")):
160
+ import json as _json
161
+ from urllib.request import urlopen
162
+
163
+ return _json.loads(urlopen(normalized, timeout=15).read())
164
+
165
+ candidates = dataset_candidate_paths(normalized)
166
+ try:
167
+ return _read_lab_json_runtime(candidates)
168
+ except Exception:
169
+ pass
170
+
171
+ return _read_lab_json_local(candidates, notebooks_path)
172
+
173
+
174
+ def load_lab_manifest(notebooks_path: str = "lab"):
175
+ """Carrega o manifesto de datasets do Lab."""
176
+ return read_lab_json("datasets/manifest.json", notebooks_path)
177
+
178
+
179
+ def get_lab_dataset(dataset_id: str, manifest=None, notebooks_path: str = "lab"):
180
+ """Busca uma entrada do manifesto por id."""
181
+ manifest = manifest or load_lab_manifest(notebooks_path)
182
+ for dataset in manifest.get("datasets", []):
183
+ if dataset.get("id") == dataset_id:
184
+ return dataset
185
+ raise KeyError(f"Dataset não declarado no manifesto do Lab: {dataset_id}")
186
+
187
+
188
+ def read_lab_dataset(dataset_or_id, manifest=None, notebooks_path: str = "lab"):
189
+ """Lê um dataset declarado no manifesto, localmente ou no HTML publicado."""
190
+ dataset = (
191
+ get_lab_dataset(dataset_or_id, manifest, notebooks_path)
192
+ if isinstance(dataset_or_id, str)
193
+ else dataset_or_id
194
+ )
195
+ location = dataset.get("assetPath") or dataset.get("path") or dataset.get("url")
196
+ if not location:
197
+ raise RuntimeError(
198
+ f"Dataset {dataset.get('id', '<sem id>')} não possui assetPath, path ou url."
199
+ )
200
+ return read_lab_json(location, notebooks_path)
201
+
202
+
203
+ def _safe_relative_path(relative_path: str) -> str:
204
+ import os as _os
205
+
206
+ value = str(relative_path or "").replace("\\", "/").strip().lstrip("/")
207
+ normalized = _os.path.normpath(value).replace("\\", "/")
208
+ if not value or normalized == "." or normalized.startswith("../") or normalized == "..":
209
+ raise RuntimeError("Caminho de snapshot local inválido.")
210
+ return normalized
211
+
212
+
213
+ def local_vault_path(relative_path: str):
214
+ """Resolve um caminho seguro dentro do repositório local do vault."""
215
+ import os as _os
216
+
217
+ context = require_local_runtime("resolver caminho local do vault")
218
+ normalized = _safe_relative_path(relative_path)
219
+ root = _os.path.abspath(context["cwd"])
220
+ target = _os.path.abspath(_os.path.join(root, normalized))
221
+ if _os.path.commonpath([root, target]) != root:
222
+ raise RuntimeError("Caminho de snapshot local sai do vault.")
223
+ return target
224
+
225
+
226
+ def _local_write_result(relative_path: str, target: str):
227
+ import os as _os
228
+
229
+ return {
230
+ "path": target,
231
+ "relativePath": _safe_relative_path(relative_path),
232
+ "bytes": _os.path.getsize(target),
233
+ }
234
+
235
+
236
+ def write_local_json_snapshot(relative_path: str, payload, *, indent: int = 2):
237
+ """Escreve um snapshot JSON versionável no vault local.
238
+
239
+ Use para etapas de Extract que precisam de filesystem, binários, navegador,
240
+ rede autenticada ou outros recursos indisponíveis no HTML/WASM publicado.
241
+ """
242
+ import json as _json
243
+ import os as _os
244
+
245
+ target = local_vault_path(relative_path)
246
+ _os.makedirs(_os.path.dirname(target), exist_ok=True)
247
+ with open(target, "w", encoding="utf-8") as f:
248
+ _json.dump(payload, f, ensure_ascii=False, indent=indent)
249
+ f.write("\n")
250
+ return _local_write_result(relative_path, target)
251
+
252
+
253
+ def write_local_dataframe_snapshot(dataframe, relative_path: str, *, format: str = None):
254
+ """Escreve DataFrame local como CSV, JSON ou Parquet.
255
+
256
+ Parquet é opcional: só funciona quando `pyarrow` ou engine compatível estiver
257
+ instalado no ambiente local. O HTML publicado deve consumir snapshots já
258
+ gerados, não tentar escrever arquivos.
259
+ """
260
+ import os as _os
261
+
262
+ target = local_vault_path(relative_path)
263
+ _os.makedirs(_os.path.dirname(target), exist_ok=True)
264
+ resolved_format = (format or _os.path.splitext(target)[1].lstrip(".")).lower()
265
+
266
+ if resolved_format == "csv":
267
+ dataframe.to_csv(target, index=False)
268
+ elif resolved_format == "json":
269
+ dataframe.to_json(target, orient="records", force_ascii=False, indent=2)
270
+ with open(target, "a", encoding="utf-8") as f:
271
+ f.write("\n")
272
+ elif resolved_format == "parquet":
273
+ dataframe.to_parquet(target, index=False)
274
+ else:
275
+ raise RuntimeError("Formato de snapshot tabular suportado: csv, json ou parquet.")
276
+
277
+ return _local_write_result(relative_path, target)
278
+
279
+
280
+ def write_local_markdown_note(relative_path: str, body: str, *, frontmatter=None):
281
+ """Escreve uma nota Markdown local para Obsidian, Bases e Dataview.
282
+
283
+ Use quando uma análise do Lab deve virar artefato curável no vault. O HTML
284
+ publicado nunca escreve notas; ele consome snapshots/notas já versionados.
285
+ """
286
+ import os as _os
287
+
288
+ target = local_vault_path(relative_path)
289
+ if not target.endswith(".md"):
290
+ raise RuntimeError("Notas geradas pelo Lab devem usar extensão .md.")
291
+
292
+ _os.makedirs(_os.path.dirname(target), exist_ok=True)
293
+ metadata = dict(frontmatter or {})
294
+ metadata.setdefault("lab_generated", True)
295
+ metadata.setdefault("status", "rascunho")
296
+
297
+ def _yaml_scalar(value):
298
+ if isinstance(value, bool):
299
+ return "true" if value else "false"
300
+ if value is None:
301
+ return "null"
302
+ return str(value).replace("\n", " ")
303
+
304
+ lines = ["---"]
305
+ for key in sorted(metadata):
306
+ value = metadata[key]
307
+ if isinstance(value, (list, tuple)):
308
+ lines.append(f"{key}:")
309
+ for item in value:
310
+ lines.append(f" - {_yaml_scalar(item)}")
311
+ else:
312
+ lines.append(f"{key}: {_yaml_scalar(value)}")
313
+ lines.extend(["---", "", str(body).rstrip(), ""])
314
+
315
+ with open(target, "w", encoding="utf-8") as f:
316
+ f.write("\n".join(lines))
317
+ return _local_write_result(relative_path, target)
318
+
319
+
320
+ def get_local_secret(name: str, default=None, *, required: bool = False):
321
+ """Lê segredo do ambiente local sem expor credenciais no HTML publicado."""
322
+ import os as _os
323
+
324
+ require_local_runtime(f"ler segredo local {name}")
325
+ value = _os.environ.get(name, default)
326
+ if required and not value:
327
+ raise RuntimeError(f"Segredo local ausente: {name}")
328
+ return value
329
+
330
+
331
+ def clean_lab_text(text, *, lower: bool = False) -> str:
332
+ """Normaliza texto bruto vindo de scraping, OCR, arquivos ou APIs."""
333
+ import re as _re
334
+
335
+ cleaned = _re.sub(r"[\n\x0c\r]+", " ", str(text or ""))
336
+ cleaned = _re.sub(r"\s+", " ", cleaned).strip()
337
+ return cleaned.lower() if lower else cleaned
338
+
339
+
340
+ def fingerprint_data(payload) -> str:
341
+ """Calcula fingerprint SHA-256 estável para payloads JSON-serializáveis."""
342
+ import hashlib as _hashlib
343
+ import json as _json
344
+
345
+ encoded = _json.dumps(payload, ensure_ascii=False, sort_keys=True).encode("utf-8")
346
+ return _hashlib.sha256(encoded).hexdigest()
347
+
348
+
349
+ def with_data_provenance(
350
+ payload,
351
+ *,
352
+ source: str,
353
+ license: str = "verificar",
354
+ privacy: str = "private-until-published",
355
+ collected_at: str = None,
356
+ ):
357
+ """Adiciona metadados mínimos de proveniência a um snapshot do Lab."""
358
+ from datetime import datetime as _datetime
359
+ from datetime import timezone as _timezone
360
+
361
+ collected = collected_at or _datetime.now(_timezone.utc).isoformat().replace("+00:00", "Z")
362
+ enriched = {
363
+ "schemaVersion": 1,
364
+ "source": source,
365
+ "collectedAt": collected,
366
+ "license": license,
367
+ "privacy": privacy,
368
+ "data": payload,
369
+ }
370
+ enriched["sha256"] = fingerprint_data(enriched)
371
+ return enriched
372
+
373
+
374
+ def read_local_text_file(relative_path: str, *, encoding: str = "utf-8"):
375
+ """Lê arquivo de texto local dentro do vault."""
376
+ with open(local_vault_path(relative_path), encoding=encoding) as f:
377
+ return f.read()
378
+
379
+
380
+ def read_local_bytes_file(relative_path: str):
381
+ """Lê arquivo binário local dentro do vault."""
382
+ with open(local_vault_path(relative_path), "rb") as f:
383
+ return f.read()
384
+
385
+
386
+ def _xml_child_text(element, names):
387
+ for name in names:
388
+ child = element.find(name)
389
+ if child is not None and child.text:
390
+ return clean_lab_text(child.text)
391
+ return None
392
+
393
+
394
+ def _xml_atom_link(element):
395
+ for link in element.findall("{http://www.w3.org/2005/Atom}link"):
396
+ href = link.attrib.get("href")
397
+ rel = link.attrib.get("rel", "alternate")
398
+ if href and rel in {"alternate", ""}:
399
+ return href
400
+ link = element.find("{http://www.w3.org/2005/Atom}link")
401
+ return link.attrib.get("href") if link is not None else None
402
+
403
+
404
+ def parse_feed_xml(xml_text: str, *, source_url: str = None, limit: int = 50):
405
+ """Converte RSS ou Atom em registros pequenos e versionáveis."""
406
+ import xml.etree.ElementTree as _ET
407
+
408
+ root = _ET.fromstring(xml_text)
409
+ items = []
410
+
411
+ channel = root.find("channel")
412
+ if channel is not None:
413
+ feed_title = _xml_child_text(channel, ["title"])
414
+ for item in channel.findall("item")[:limit]:
415
+ items.append(
416
+ {
417
+ "title": _xml_child_text(item, ["title"]),
418
+ "url": _xml_child_text(item, ["link"]),
419
+ "published": _xml_child_text(item, ["pubDate", "date"]),
420
+ "updated": _xml_child_text(item, ["updated"]),
421
+ "summary": _xml_child_text(item, ["description", "summary"]),
422
+ "guid": _xml_child_text(item, ["guid", "id"]),
423
+ }
424
+ )
425
+ return {
426
+ "schemaVersion": 1,
427
+ "kind": "feed",
428
+ "format": "rss",
429
+ "source": source_url,
430
+ "title": feed_title,
431
+ "itemCount": len(items),
432
+ "items": items,
433
+ }
434
+
435
+ atom = "{http://www.w3.org/2005/Atom}"
436
+ feed_title = _xml_child_text(root, [f"{atom}title", "title"])
437
+ for entry in root.findall(f"{atom}entry")[:limit]:
438
+ items.append(
439
+ {
440
+ "title": _xml_child_text(entry, [f"{atom}title", "title"]),
441
+ "url": _xml_atom_link(entry),
442
+ "published": _xml_child_text(entry, [f"{atom}published", "published"]),
443
+ "updated": _xml_child_text(entry, [f"{atom}updated", "updated"]),
444
+ "summary": _xml_child_text(entry, [f"{atom}summary", f"{atom}content", "summary"]),
445
+ "guid": _xml_child_text(entry, [f"{atom}id", "id"]),
446
+ }
447
+ )
448
+
449
+ return {
450
+ "schemaVersion": 1,
451
+ "kind": "feed",
452
+ "format": "atom",
453
+ "source": source_url,
454
+ "title": feed_title,
455
+ "itemCount": len(items),
456
+ "items": items,
457
+ }
458
+
459
+
460
+ def fetch_local_feed(url: str, *, timeout: int = 20, user_agent: str = "vault-seed-lab/1.0", limit: int = 50):
461
+ """Baixa e normaliza um feed RSS/Atom no ambiente local."""
462
+ from urllib.request import Request as _Request
463
+ from urllib.request import urlopen as _urlopen
464
+
465
+ require_local_runtime("coletar feed RSS/Atom localmente")
466
+ request = _Request(url, headers={"User-Agent": user_agent})
467
+ with _urlopen(request, timeout=timeout) as response:
468
+ xml_text = response.read().decode(response.headers.get_content_charset() or "utf-8", "replace")
469
+ return parse_feed_xml(xml_text, source_url=url, limit=limit)
470
+
471
+
472
+ def fetch_local_url_text(url: str, *, timeout: int = 20, user_agent: str = "vault-seed-lab/1.0"):
473
+ """Extrai HTML/texto de uma URL no ambiente local usando biblioteca padrão."""
474
+ import re as _re
475
+ from html.parser import HTMLParser as _HTMLParser
476
+ from urllib.request import Request as _Request
477
+ from urllib.request import urlopen as _urlopen
478
+
479
+ require_local_runtime("extrair página web localmente")
480
+
481
+ class _TextParser(_HTMLParser):
482
+ def __init__(self):
483
+ super().__init__()
484
+ self._title = []
485
+ self._chunks = []
486
+ self._in_title = False
487
+ self._ignored = 0
488
+
489
+ def handle_starttag(self, tag, attrs):
490
+ if tag in {"script", "style", "noscript"}:
491
+ self._ignored += 1
492
+ if tag == "title":
493
+ self._in_title = True
494
+
495
+ def handle_endtag(self, tag):
496
+ if tag in {"script", "style", "noscript"} and self._ignored:
497
+ self._ignored -= 1
498
+ if tag == "title":
499
+ self._in_title = False
500
+
501
+ def handle_data(self, data):
502
+ if self._ignored:
503
+ return
504
+ if self._in_title:
505
+ self._title.append(data)
506
+ self._chunks.append(data)
507
+
508
+ request = _Request(url, headers={"User-Agent": user_agent})
509
+ with _urlopen(request, timeout=timeout) as response:
510
+ html = response.read().decode(response.headers.get_content_charset() or "utf-8", "replace")
511
+
512
+ parser = _TextParser()
513
+ parser.feed(html)
514
+ text = clean_lab_text(" ".join(parser._chunks))
515
+ return {
516
+ "url": url,
517
+ "title": clean_lab_text(" ".join(parser._title)) or None,
518
+ "text": text,
519
+ "textPreview": text[:500],
520
+ "links": _re.findall(r"href=[\"']([^\"']+)", html)[:50],
521
+ }
522
+
523
+
524
+ async def scrape_local_page_text(url: str, *, wait_until: str = "networkidle"):
525
+ """Extrai página dinâmica localmente com Playwright, quando instalado."""
526
+ require_local_runtime("extrair página dinâmica com Playwright")
527
+ try:
528
+ from playwright.async_api import async_playwright as _async_playwright
529
+ except ImportError as exc:
530
+ raise RuntimeError(
531
+ "Playwright não está instalado. Instale apenas no ambiente local quando precisar de scraping dinâmico."
532
+ ) from exc
533
+
534
+ async with _async_playwright() as playwright:
535
+ browser = await playwright.chromium.launch(headless=True)
536
+ page = await browser.new_page()
537
+ await page.goto(url, wait_until=wait_until)
538
+ title = await page.title()
539
+ text = await page.inner_text("body")
540
+ await browser.close()
541
+
542
+ cleaned = clean_lab_text(text)
543
+ return {"url": url, "title": title, "text": cleaned, "textPreview": cleaned[:500]}
544
+
545
+
546
+ def extract_local_image_text(image_input, *, languages: str = "por+eng"):
547
+ """Executa OCR local em caminho, bytes, objeto PIL ou URL de imagem."""
548
+ from io import BytesIO as _BytesIO
549
+ from urllib.request import urlopen as _urlopen
550
+
551
+ require_local_runtime("executar OCR local")
552
+ try:
553
+ import pytesseract as _pytesseract
554
+ from PIL import Image as _Image
555
+ except ImportError as exc:
556
+ raise RuntimeError(
557
+ "OCR local requer pillow e pytesseract instalados, além do binário tesseract."
558
+ ) from exc
559
+
560
+ if isinstance(image_input, str) and image_input.startswith(("http://", "https://")):
561
+ with _urlopen(image_input, timeout=20) as response:
562
+ image = _Image.open(_BytesIO(response.read()))
563
+ elif isinstance(image_input, str):
564
+ image = _Image.open(local_vault_path(image_input))
565
+ elif isinstance(image_input, bytes):
566
+ image = _Image.open(_BytesIO(image_input))
567
+ else:
568
+ image = image_input
569
+
570
+ return clean_lab_text(_pytesseract.image_to_string(image, lang=languages))
@@ -0,0 +1,80 @@
1
+ Metadata-Version: 2.4
2
+ Name: dgk-lab-runtime
3
+ Version: 0.1.0
4
+ Summary: Digital Gardening Kit — Lab notebook runtime utilities for Marimo notebooks in Obsidian vaults
5
+ Project-URL: Repository, https://github.com/aretw0/vault-seed
6
+ Project-URL: Bug Tracker, https://github.com/aretw0/vault-seed/issues
7
+ License: GPL-3.0-only
8
+ Keywords: digital-garden,marimo,notebook,obsidian,vault
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: GNU General Public License v3 (GPLv3)
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Requires-Python: >=3.11
17
+ Provides-Extra: ocr
18
+ Requires-Dist: pillow>=10.0; extra == 'ocr'
19
+ Requires-Dist: pytesseract>=0.3; extra == 'ocr'
20
+ Provides-Extra: scraping
21
+ Requires-Dist: playwright>=1.40; extra == 'scraping'
22
+ Description-Content-Type: text/markdown
23
+
24
+ # dgk-lab-runtime
25
+
26
+ Lab notebook runtime utilities from the [Digital Gardening Kit](https://github.com/aretw0/vault-seed).
27
+
28
+ Designed for Marimo notebooks that live alongside an Obsidian vault — handles the runtime boundary between local ETL (filesystem, secrets, network) and the published HTML/WASM notebook. Works with any vault layout; the defaults follow the `vault-seed` conventions (`lab/` as the notebooks path, `public/lab/` as the dataset directory) which you can override via environment variables or function arguments.
29
+
30
+ ## Install
31
+
32
+ ```bash
33
+ pip install dgk-lab-runtime
34
+ # with scraping support (Playwright)
35
+ pip install "dgk-lab-runtime[scraping]"
36
+ # with OCR support (Tesseract)
37
+ pip install "dgk-lab-runtime[ocr]"
38
+ ```
39
+
40
+ ## Usage
41
+
42
+ ```python
43
+ from dgk_lab_runtime import (
44
+ lab_runtime_context,
45
+ read_lab_json,
46
+ load_lab_manifest,
47
+ read_lab_dataset,
48
+ write_local_json_snapshot,
49
+ fetch_local_feed,
50
+ fingerprint_data,
51
+ with_data_provenance,
52
+ )
53
+
54
+ ctx = lab_runtime_context()
55
+ # {"runtime": "local", "isPackaged": False, "capabilities": {...}, ...}
56
+
57
+ # Read a dataset from the Lab manifest (local or Pyodide/WASM)
58
+ data = read_lab_dataset("my-dataset")
59
+
60
+ # Write a versioned JSON snapshot to the vault
61
+ write_local_json_snapshot("40 - Recursos/data/snapshot.json", data)
62
+ ```
63
+
64
+ ## Configuration
65
+
66
+ | Environment variable | Default | Description |
67
+ |---|---|---|
68
+ | `VAULT_NOTEBOOKS_PATH` | `lab` | URL segment where notebooks are published |
69
+
70
+ ## Runtime boundary
71
+
72
+ Every function that writes files, reads secrets, or makes outbound requests calls `require_local_runtime()` and raises `RuntimeError` when running inside a packaged HTML/WASM notebook. This boundary is intentional: ETL logic runs locally before export; the published notebook only reads pre-generated snapshots.
73
+
74
+ ## Vault-seed compatibility
75
+
76
+ If you use [vault-seed](https://github.com/aretw0/vault-seed), the `_lab_notebook_runtime.py` shim in `99 - Meta e Anexos/Notebooks/` imports this package transparently when installed, so existing notebooks work unchanged. Without installation the shim uses an inline fallback — same API, no external dependency.
77
+
78
+ ## License
79
+
80
+ GPL-3.0-only — see [LICENSE.md](../../LICENSE.md) in the repository root.
@@ -0,0 +1,4 @@
1
+ dgk_lab_runtime/__init__.py,sha256=8DE5jzBRC-fMWQrbDe_LOYU3atEpwJEpABWoB1O_7qA,19992
2
+ dgk_lab_runtime-0.1.0.dist-info/METADATA,sha256=drFpoGNP93gUQLKzklzB0yq3ytjf3VKIJK-phNlWCs0,3219
3
+ dgk_lab_runtime-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
4
+ dgk_lab_runtime-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any