contextbase-plugin-chrome-local 0.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 2.3
2
+ Name: contextbase-plugin-chrome-local
3
+ Version: 0.2.4
4
+ Summary: Chrome local plugin for ContextBase
5
+ Author: Alizain Feerasta
6
+ Author-email: Alizain Feerasta <alizain.feerasta@gmail.com>
7
+ Requires-Dist: contextbase-shared-plugins==0.2.4
8
+ Requires-Dist: dagster==1.12.14
9
+ Requires-Dist: dagster-dlt==0.28.14
10
+ Requires-Dist: dlt>=1.26.0
11
+ Requires-Dist: pydantic>=2.12.0
12
+ Requires-Dist: sqlalchemy>=2.0.0
13
+ Requires-Python: >=3.14, <3.15
@@ -0,0 +1,17 @@
1
+ plugin_chrome_local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ plugin_chrome_local/binding_config.py,sha256=p_l7z-2oCMivd_8-H6T9uReOJUslfAsR8O6WBd7NHS0,1498
3
+ plugin_chrome_local/component.py,sha256=a542jdao0HU-BHgW6MbD2O0hPKnwgSurzAgiXqXxbxM,4063
4
+ plugin_chrome_local/defs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ plugin_chrome_local/defs/defs.yaml,sha256=IYKSc6UTq-426pzOmauAZbwFrMJJB1XplhDGIbEW850,61
6
+ plugin_chrome_local/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ plugin_chrome_local/models/base.py,sha256=Fp7n4EfxQOxzqm34-D-rYyu60P88eZUIewCBNP_Iuc4,119
8
+ plugin_chrome_local/models/ctx.py,sha256=ugppXE8Y2Cux9yz26AqzBLY58Q_kX8W7QZ910jhY1cQ,754
9
+ plugin_chrome_local/models/ingress.py,sha256=fU1WpaRUijJKF3cA4-sGXEmIt2uFYilcr6BIoxlAkck,1588
10
+ plugin_chrome_local/models/translators.py,sha256=Q70Xkb54ZB8T4lecklRDypxbKGlxuVlY_bDkPtt6JDs,2010
11
+ plugin_chrome_local/models/types.py,sha256=u2Y5SHZdK-_SFOg254UDtAD7tdZ5FjnbY6tsxjlp0ys,68
12
+ plugin_chrome_local/plugin.json,sha256=ntgpILrXGIkOnaeWMSgFv_DAS58MVKG4A3yfr3DkzDQ,85
13
+ plugin_chrome_local/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ plugin_chrome_local/sources/snapshot.py,sha256=kgVe6kx86vna-FCiEiAteaNnCjL2twy5grB8wU4TL8g,10477
15
+ contextbase_plugin_chrome_local-0.2.4.dist-info/WHEEL,sha256=i9aSRDivn5iP9LaR1BLQX2GNAuriQWPsFwbbWygTX2k,81
16
+ contextbase_plugin_chrome_local-0.2.4.dist-info/METADATA,sha256=y5jVuyAhKZjWQc0GLaU2nQckGbm5rYWNrF4TqxLRKEo,445
17
+ contextbase_plugin_chrome_local-0.2.4.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.11.15
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
File without changes
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from pydantic import Field, model_validator
6
+
7
+ from shared_plugins.bindings import BaseBindingConfigModel, NonEmptyText, ResolvedPath
8
+
9
+
10
+ class ChromeLocalDataDir(BaseBindingConfigModel):
11
+ name: NonEmptyText
12
+ path: ResolvedPath
13
+
14
+
15
+ class ChromeLocalBindingConfig(BaseBindingConfigModel):
16
+ data_dirs: list[ChromeLocalDataDir] = Field(
17
+ default_factory=lambda: [
18
+ ChromeLocalDataDir(
19
+ name="chrome",
20
+ path=Path.home()
21
+ / "Library"
22
+ / "Application Support"
23
+ / "Google"
24
+ / "Chrome",
25
+ ),
26
+ ChromeLocalDataDir(
27
+ name="chrome_canary",
28
+ path=Path.home()
29
+ / "Library"
30
+ / "Application Support"
31
+ / "Google"
32
+ / "Chrome Canary",
33
+ ),
34
+ ],
35
+ min_length=1,
36
+ )
37
+
38
+ @model_validator(mode="after")
39
+ def _validate_unique_names(self) -> ChromeLocalBindingConfig:
40
+ duplicate_names = tuple(
41
+ name
42
+ for name in dict.fromkeys(data_dir.name for data_dir in self.data_dirs)
43
+ if sum(data_dir.name == name for data_dir in self.data_dirs) > 1
44
+ )
45
+ if duplicate_names:
46
+ duplicates = ", ".join(duplicate_names)
47
+ raise ValueError(
48
+ f"data_dirs names must be unique; duplicates: {duplicates}."
49
+ )
50
+ return self
@@ -0,0 +1,120 @@
1
+ import dagster as dg
2
+ from dagster import AssetExecutionContext
3
+ from dagster_dlt import DagsterDltResource
4
+ from shared_plugins.automation import non_overlapping_automation_condition
5
+ from shared_plugins.bindings import (
6
+ parse_binding_config,
7
+ resolve_binding_models,
8
+ )
9
+ from shared_plugins.control_plane import ControlPlaneClient
10
+ from shared_plugins.dlt import resolve_partition_binding, run_dlt_pipeline
11
+ from shared_plugins.naming import (
12
+ dagster_asset_group_name,
13
+ dagster_asset_tags,
14
+ dagster_dlt_asset_key,
15
+ dagster_partition_def_name,
16
+ dagster_pool_name,
17
+ dlt_source_name,
18
+ plugin_id_from_module,
19
+ )
20
+ from shared_plugins.resources import DLT_RESOURCE
21
+
22
+ from .binding_config import ChromeLocalBindingConfig
23
+ from .sources.snapshot import SUPPORTED_MODEL_NAMES, chrome_local_snapshot_source
24
+
25
+ PLUGIN_ID = plugin_id_from_module(__file__)
26
+ SNAPSHOT_JOB = "snapshot"
27
+ SNAPSHOT_SOURCE_NAME = dlt_source_name(PLUGIN_ID, SNAPSHOT_JOB)
28
+
29
+
30
+ def _build_snapshot_specs(
31
+ partitions_def: dg.PartitionsDefinition,
32
+ automation_condition: dg.AutomationCondition,
33
+ ) -> list[dg.AssetSpec]:
34
+ shared = dict(
35
+ group_name=dagster_asset_group_name(PLUGIN_ID),
36
+ tags=dagster_asset_tags(PLUGIN_ID),
37
+ automation_condition=automation_condition,
38
+ partitions_def=partitions_def,
39
+ )
40
+
41
+ visit_key = dagster_dlt_asset_key(SNAPSHOT_SOURCE_NAME, "visit")
42
+ bookmark_key = dagster_dlt_asset_key(SNAPSHOT_SOURCE_NAME, "bookmark")
43
+
44
+ return [
45
+ dg.AssetSpec(
46
+ key=visit_key,
47
+ **shared,
48
+ ),
49
+ dg.AssetSpec(
50
+ key=bookmark_key,
51
+ **shared,
52
+ ),
53
+ ]
54
+
55
+
56
+ class ChromeLocalSyncComponent(dg.Component):
57
+ def build_defs(self, context: dg.ComponentLoadContext) -> dg.Definitions:
58
+ partitions_def = dg.DynamicPartitionsDefinition(
59
+ name=dagster_partition_def_name(PLUGIN_ID)
60
+ )
61
+
62
+ snapshot_specs = _build_snapshot_specs(
63
+ partitions_def=partitions_def,
64
+ automation_condition=non_overlapping_automation_condition(
65
+ dg.AutomationCondition.on_missing()
66
+ | dg.AutomationCondition.on_cron("*/15 * * * *")
67
+ ),
68
+ )
69
+
70
+ @dg.multi_asset(
71
+ specs=snapshot_specs,
72
+ can_subset=True,
73
+ name="chrome_local_snapshot",
74
+ pool=dagster_pool_name(PLUGIN_ID),
75
+ )
76
+ def chrome_local_snapshot_assets(
77
+ context: AssetExecutionContext,
78
+ dlt_resource: DagsterDltResource,
79
+ control_plane: dg.ResourceParam[ControlPlaneClient],
80
+ ):
81
+ binding = resolve_partition_binding(
82
+ context=context,
83
+ control_plane=control_plane,
84
+ plugin_id=PLUGIN_ID,
85
+ )
86
+ binding_id = str(binding.binding_id)
87
+ cfg = parse_binding_config(binding, ChromeLocalBindingConfig)
88
+ binding_models = resolve_binding_models(
89
+ binding,
90
+ supported_models=SUPPORTED_MODEL_NAMES,
91
+ default_active=SUPPORTED_MODEL_NAMES,
92
+ )
93
+ source = chrome_local_snapshot_source(
94
+ binding_id,
95
+ cfg,
96
+ binding_models=binding_models,
97
+ )
98
+ yield from run_dlt_pipeline(
99
+ context=context,
100
+ dlt_resource=dlt_resource,
101
+ source=source,
102
+ plugin_id=PLUGIN_ID,
103
+ binding_id=binding_id,
104
+ job_name=SNAPSHOT_JOB,
105
+ )
106
+
107
+ automation_sensor = dg.AutomationConditionSensorDefinition(
108
+ name="chrome_local_automation_sensor",
109
+ target=dg.AssetSelection.assets(chrome_local_snapshot_assets),
110
+ default_status=dg.DefaultSensorStatus.RUNNING,
111
+ minimum_interval_seconds=30,
112
+ )
113
+
114
+ return dg.Definitions(
115
+ assets=[chrome_local_snapshot_assets],
116
+ sensors=[automation_sensor],
117
+ resources={
118
+ "dlt_resource": DLT_RESOURCE,
119
+ },
120
+ )
File without changes
@@ -0,0 +1 @@
1
+ type: plugin_chrome_local.component.ChromeLocalSyncComponent
File without changes
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from sqlalchemy.orm import DeclarativeBase
4
+
5
+
6
+ class Base(DeclarativeBase):
7
+ pass
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ from pydantic import AwareDatetime
4
+ from shared_plugins.models import CtxModel, IdStr, NonNegativeInt
5
+
6
+
7
+ class VisitRow(CtxModel):
8
+ profile: IdStr
9
+ visit_id: NonNegativeInt
10
+ url: str | None = None
11
+ title: str | None = None
12
+ visit_time: AwareDatetime | None = None
13
+ visit_duration_ms: NonNegativeInt | None = None
14
+ url_visit_count: NonNegativeInt | None = None
15
+ url_typed_count: NonNegativeInt | None = None
16
+ transition_type: int | None = None
17
+
18
+
19
+ class BookmarkRow(CtxModel):
20
+ profile: IdStr
21
+ id: IdStr
22
+ guid: str | None = None
23
+ name: str | None = None
24
+ url: IdStr
25
+ folder_path: IdStr
26
+ date_added: AwareDatetime | None = None
27
+ date_last_used: AwareDatetime | None = None
@@ -0,0 +1,56 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from typing import Literal
5
+
6
+ from pydantic import Field, StrictStr
7
+ from shared_plugins.models import IdStr, IngressModel
8
+ from shared_plugins.sqlalchemy_types import ChromiumTimestamp
9
+ from sqlalchemy import ForeignKey, String
10
+ from sqlalchemy.orm import Mapped, mapped_column, relationship
11
+
12
+ from .base import Base
13
+ from .types import RawTimestamp
14
+
15
+
16
+ class ChromeProfileIngress(IngressModel):
17
+ name: IdStr
18
+ dir_path: IdStr
19
+
20
+
21
+ class ChromeUrl(Base):
22
+ """Row from urls table in Chrome History database."""
23
+
24
+ __tablename__ = "urls"
25
+
26
+ id: Mapped[int] = mapped_column(primary_key=True)
27
+ url: Mapped[str | None] = mapped_column(String)
28
+ title: Mapped[str | None] = mapped_column(String)
29
+ visit_count: Mapped[int | None]
30
+ typed_count: Mapped[int | None]
31
+
32
+
33
+ class ChromeVisit(Base):
34
+ """Row from visits table in Chrome History database."""
35
+
36
+ __tablename__ = "visits"
37
+
38
+ id: Mapped[int] = mapped_column(primary_key=True)
39
+ url_id: Mapped[int] = mapped_column("url", ForeignKey("urls.id"))
40
+ visit_time: Mapped[datetime | None] = mapped_column(ChromiumTimestamp())
41
+ visit_duration: Mapped[int | None]
42
+ transition: Mapped[int | None]
43
+
44
+ url_entry: Mapped[ChromeUrl] = relationship()
45
+
46
+
47
+ class ChromeBookmarkIngress(IngressModel):
48
+ type: Literal["url"]
49
+ profile: IdStr
50
+ id: IdStr
51
+ guid: StrictStr | None = None
52
+ name: StrictStr | None = None
53
+ url: StrictStr = Field(min_length=1)
54
+ folder_path: IdStr
55
+ date_added: RawTimestamp = None
56
+ date_last_used: RawTimestamp = None
@@ -0,0 +1,68 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Iterable, Iterator
4
+ from datetime import datetime, timedelta
5
+
6
+ from shared_plugins.sqlalchemy_types import CHROMIUM_EPOCH
7
+
8
+ from .ctx import BookmarkRow, VisitRow
9
+ from .ingress import ChromeBookmarkIngress, ChromeVisit
10
+
11
+
12
+ def _chromium_to_datetime(
13
+ *,
14
+ value: int | str | None,
15
+ ) -> datetime | None:
16
+ if value is None:
17
+ return None
18
+
19
+ return CHROMIUM_EPOCH + timedelta(microseconds=int(value))
20
+
21
+
22
+ def visits_to_ctx_models(
23
+ *,
24
+ binding_id: str,
25
+ profile: str,
26
+ visits: Iterable[ChromeVisit],
27
+ ) -> Iterator[VisitRow]:
28
+ for visit in visits:
29
+ yield VisitRow(
30
+ ctx_binding_id=binding_id,
31
+ ctx_source_updated_at=visit.visit_time,
32
+ profile=profile,
33
+ visit_id=visit.id,
34
+ url=visit.url_entry.url,
35
+ title=visit.url_entry.title,
36
+ visit_time=visit.visit_time,
37
+ visit_duration_ms=visit.visit_duration,
38
+ url_visit_count=visit.url_entry.visit_count,
39
+ url_typed_count=visit.url_entry.typed_count,
40
+ transition_type=visit.transition,
41
+ )
42
+
43
+
44
+ def bookmarks_to_ctx_models(
45
+ *,
46
+ binding_id: str,
47
+ bookmarks: Iterable[ChromeBookmarkIngress],
48
+ ) -> Iterator[BookmarkRow]:
49
+ for bookmark in bookmarks:
50
+ date_added = _chromium_to_datetime(
51
+ value=bookmark.date_added,
52
+ )
53
+ date_last_used = _chromium_to_datetime(
54
+ value=bookmark.date_last_used,
55
+ )
56
+ source_updated_at = date_last_used if date_last_used is not None else date_added
57
+ yield BookmarkRow(
58
+ ctx_binding_id=binding_id,
59
+ ctx_source_updated_at=source_updated_at,
60
+ profile=bookmark.profile,
61
+ id=bookmark.id,
62
+ guid=bookmark.guid,
63
+ name=bookmark.name,
64
+ url=bookmark.url,
65
+ folder_path=bookmark.folder_path,
66
+ date_added=date_added,
67
+ date_last_used=date_last_used,
68
+ )
@@ -0,0 +1,3 @@
1
+ from __future__ import annotations
2
+
3
+ RawTimestamp = int | str | None
@@ -0,0 +1,7 @@
1
+ {
2
+ "auth": {
3
+ "type": "none"
4
+ },
5
+ "mode": "dagster",
6
+ "plugin_id": "chrome_local"
7
+ }
File without changes
@@ -0,0 +1,338 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ from collections.abc import Iterator, Mapping, Sequence
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ import dlt
11
+ from pydantic import ValidationError
12
+ from shared_plugins.bindings import ResolvedBindingModels, iter_active_model_rows
13
+ from shared_plugins.models import format_validation_error
14
+ from shared_plugins.naming import (
15
+ dlt_resource_name,
16
+ dlt_source_name,
17
+ plugin_id_from_module,
18
+ )
19
+ from shared_plugins.resources import ctx_dlt_resource
20
+ from shared_plugins.sqlite import sqlite_snapshot
21
+ from sqlalchemy import create_engine, select
22
+ from sqlalchemy.orm import Session, joinedload
23
+
24
+ from ..binding_config import ChromeLocalBindingConfig, ChromeLocalDataDir
25
+ from ..models.ctx import BookmarkRow, VisitRow
26
+ from ..models.ingress import (
27
+ ChromeBookmarkIngress,
28
+ ChromeProfileIngress,
29
+ ChromeVisit,
30
+ )
31
+ from ..models.translators import bookmarks_to_ctx_models, visits_to_ctx_models
32
+
33
+ PLUGIN_ID = plugin_id_from_module(__file__)
34
+ JOB = "snapshot"
35
+ LOGGER = logging.getLogger(__name__)
36
+ SUPPORTED_MODEL_NAMES = ("visit", "bookmark")
37
+ ROOT_FOLDERS = ("bookmark_bar", "other", "synced")
38
+ MERGE_WRITE_DISPOSITION = {"disposition": "merge", "strategy": "delete-insert"}
39
+ MERGE_KEY = ("_ctx_binding_id",)
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class BookmarkSkip:
44
+ profile: str
45
+ id: object
46
+ guid: object
47
+ name: object
48
+ url: object
49
+ folder_path: str
50
+ reason: str
51
+ validation_error: str
52
+
53
+
54
+ def discover_profiles(
55
+ data_dirs: Sequence[ChromeLocalDataDir],
56
+ ) -> list[ChromeProfileIngress]:
57
+ """Scan the configured Chromium data directories for profile subdirectories."""
58
+ profiles: list[ChromeProfileIngress] = []
59
+ for data_dir in data_dirs:
60
+ browser_prefix = data_dir.name
61
+ data_dir_path = data_dir.path
62
+ if not data_dir_path.is_dir():
63
+ continue
64
+ for child in sorted(data_dir_path.iterdir()):
65
+ if not child.is_dir():
66
+ continue
67
+ has_history = (child / "History").is_file()
68
+ has_bookmarks = (child / "Bookmarks").is_file()
69
+ if has_history or has_bookmarks:
70
+ profiles.append(
71
+ ChromeProfileIngress.model_validate(
72
+ {
73
+ "name": f"{browser_prefix}/{child.name}",
74
+ "dir_path": str(child),
75
+ }
76
+ )
77
+ )
78
+
79
+ if not profiles:
80
+ searched = ", ".join(str(d.path) for d in data_dirs)
81
+ raise RuntimeError(
82
+ f"No Chrome profiles found. "
83
+ f"Searched for subdirectories containing History or Bookmarks in: {searched}"
84
+ )
85
+
86
+ return profiles
87
+
88
+
89
+ def _iter_profile_visits(profile: ChromeProfileIngress) -> Iterator[ChromeVisit]:
90
+ """Yield visit records from a single Chrome profile's History database."""
91
+ history_path = Path(profile.dir_path) / "History"
92
+ if not history_path.is_file():
93
+ return
94
+
95
+ with sqlite_snapshot(history_path) as snapshot_path:
96
+ engine = create_engine(f"sqlite:///{snapshot_path}")
97
+ try:
98
+ with Session(engine) as session:
99
+ query = select(ChromeVisit).options(joinedload(ChromeVisit.url_entry))
100
+ yield from session.scalars(query).unique()
101
+ finally:
102
+ engine.dispose()
103
+
104
+
105
+ def append_folder(folder_path: str, child_folder_name: str | None) -> str:
106
+ if not child_folder_name:
107
+ return folder_path
108
+
109
+ return f"{folder_path}/{child_folder_name}"
110
+
111
+
112
+ def _to_bookmark_record_candidate(
113
+ *,
114
+ profile_name: str,
115
+ node: Mapping[str, object],
116
+ folder_path: str,
117
+ ) -> ChromeBookmarkIngress | BookmarkSkip:
118
+ candidate = dict(node)
119
+ candidate["profile"] = profile_name
120
+ candidate["folder_path"] = folder_path
121
+
122
+ try:
123
+ return ChromeBookmarkIngress.model_validate(candidate)
124
+ except ValidationError as exc:
125
+ message = format_validation_error(exc)
126
+ if "url" in message:
127
+ return BookmarkSkip(
128
+ profile=profile_name,
129
+ id=node.get("id"),
130
+ guid=node.get("guid"),
131
+ name=node.get("name"),
132
+ url=node.get("url"),
133
+ folder_path=folder_path,
134
+ reason="invalid_url",
135
+ validation_error=message,
136
+ )
137
+ raise
138
+
139
+
140
+ def _log_bookmark_skip(skip: BookmarkSkip) -> None:
141
+ LOGGER.warning(
142
+ "chrome_local.bookmark_skipped %s",
143
+ json.dumps(
144
+ {
145
+ "profile": skip.profile,
146
+ "id": skip.id,
147
+ "guid": skip.guid,
148
+ "name": skip.name,
149
+ "url": skip.url,
150
+ "folder_path": skip.folder_path,
151
+ "reason": skip.reason,
152
+ "validation_error": skip.validation_error,
153
+ },
154
+ sort_keys=True,
155
+ default=str,
156
+ ),
157
+ )
158
+
159
+
160
+ def _walk_bookmark_node(
161
+ *,
162
+ profile_name: str,
163
+ node_value: object,
164
+ folder_path: str,
165
+ skipped_bookmarks: list[BookmarkSkip],
166
+ ) -> Iterator[ChromeBookmarkIngress]:
167
+ if not isinstance(node_value, Mapping):
168
+ return
169
+
170
+ node_type = node_value.get("type")
171
+ if node_type == "url":
172
+ bookmark_result = _to_bookmark_record_candidate(
173
+ profile_name=profile_name,
174
+ node=node_value,
175
+ folder_path=folder_path,
176
+ )
177
+ if isinstance(bookmark_result, BookmarkSkip):
178
+ _log_bookmark_skip(bookmark_result)
179
+ skipped_bookmarks.append(bookmark_result)
180
+ return
181
+ yield bookmark_result
182
+ return
183
+
184
+ if node_type != "folder":
185
+ return
186
+
187
+ children = node_value.get("children")
188
+ if not isinstance(children, list):
189
+ return
190
+
191
+ for child in children:
192
+ if not isinstance(child, Mapping):
193
+ continue
194
+
195
+ child_type = child.get("type")
196
+ if child_type == "folder":
197
+ child_name = child.get("name")
198
+ next_folder_path = append_folder(
199
+ folder_path,
200
+ child_name if isinstance(child_name, str) else None,
201
+ )
202
+ yield from _walk_bookmark_node(
203
+ profile_name=profile_name,
204
+ node_value=child,
205
+ folder_path=next_folder_path,
206
+ skipped_bookmarks=skipped_bookmarks,
207
+ )
208
+ continue
209
+
210
+ if child_type == "url":
211
+ bookmark_result = _to_bookmark_record_candidate(
212
+ profile_name=profile_name,
213
+ node=child,
214
+ folder_path=folder_path,
215
+ )
216
+ if isinstance(bookmark_result, BookmarkSkip):
217
+ _log_bookmark_skip(bookmark_result)
218
+ skipped_bookmarks.append(bookmark_result)
219
+ continue
220
+ yield bookmark_result
221
+
222
+
223
+ def _load_profile_bookmarks(
224
+ profile: ChromeProfileIngress,
225
+ ) -> tuple[list[ChromeBookmarkIngress], list[BookmarkSkip]]:
226
+ bookmarks_path = Path(profile.dir_path) / "Bookmarks"
227
+
228
+ if not bookmarks_path.is_file():
229
+ return [], []
230
+
231
+ raw = bookmarks_path.read_text(encoding="utf-8")
232
+ parsed = json.loads(raw)
233
+
234
+ if not isinstance(parsed, Mapping):
235
+ raise RuntimeError("Chrome Bookmarks root must be a JSON object.")
236
+
237
+ roots = parsed.get("roots")
238
+ if not isinstance(roots, Mapping):
239
+ raise RuntimeError(
240
+ "Chrome Bookmarks JSON missing 'roots'; cannot extract bookmarks."
241
+ )
242
+
243
+ bookmarks: list[ChromeBookmarkIngress] = []
244
+ skipped_bookmarks: list[BookmarkSkip] = []
245
+ for root_folder in ROOT_FOLDERS:
246
+ root_folder_node = roots.get(root_folder)
247
+ if root_folder_node is None:
248
+ continue
249
+
250
+ bookmarks.extend(
251
+ _walk_bookmark_node(
252
+ profile_name=profile.name,
253
+ node_value=root_folder_node,
254
+ folder_path=root_folder,
255
+ skipped_bookmarks=skipped_bookmarks,
256
+ )
257
+ )
258
+
259
+ return bookmarks, skipped_bookmarks
260
+
261
+
262
+ def _scan_chrome_bookmarks(
263
+ data_dirs: Sequence[ChromeLocalDataDir],
264
+ ) -> list[ChromeBookmarkIngress]:
265
+ profiles = discover_profiles(data_dirs)
266
+ bookmarks: list[ChromeBookmarkIngress] = []
267
+ skipped_bookmarks: list[BookmarkSkip] = []
268
+
269
+ for profile in profiles:
270
+ profile_bookmarks, profile_skips = _load_profile_bookmarks(profile)
271
+ bookmarks.extend(profile_bookmarks)
272
+ skipped_bookmarks.extend(profile_skips)
273
+
274
+ LOGGER.info(
275
+ "chrome_local.bookmarks_discovered profiles=%d bookmarks=%d skipped_bookmarks=%d",
276
+ len(profiles),
277
+ len(bookmarks),
278
+ len(skipped_bookmarks),
279
+ )
280
+
281
+ return bookmarks
282
+
283
+
284
+ @dlt.source(name=dlt_source_name(PLUGIN_ID, JOB))
285
+ def chrome_local_snapshot_source(
286
+ binding_id: str,
287
+ cfg: ChromeLocalBindingConfig,
288
+ *,
289
+ binding_models: ResolvedBindingModels,
290
+ ) -> tuple[Any, ...]:
291
+ """Chrome local profile snapshot for one binding."""
292
+ data_dirs = cfg.data_dirs
293
+
294
+ @ctx_dlt_resource(
295
+ name=dlt_resource_name("visit"),
296
+ write_disposition=MERGE_WRITE_DISPOSITION,
297
+ merge_key=MERGE_KEY,
298
+ primary_key=("_ctx_binding_id", "profile", "visit_id"),
299
+ )
300
+ def visit_resource() -> Iterator[VisitRow]:
301
+ if "visit" not in binding_models.active:
302
+ return
303
+
304
+ profiles = discover_profiles(data_dirs)
305
+ for profile in profiles:
306
+ yield from iter_active_model_rows(
307
+ model_name="visit",
308
+ rows=visits_to_ctx_models(
309
+ binding_id=binding_id,
310
+ profile=profile.name,
311
+ visits=_iter_profile_visits(profile),
312
+ ),
313
+ binding_models=binding_models,
314
+ )
315
+
316
+ @ctx_dlt_resource(
317
+ name=dlt_resource_name("bookmark"),
318
+ write_disposition=MERGE_WRITE_DISPOSITION,
319
+ merge_key=MERGE_KEY,
320
+ primary_key=("_ctx_binding_id", "profile", "id"),
321
+ )
322
+ def bookmark_resource() -> Iterator[BookmarkRow]:
323
+ if "bookmark" not in binding_models.active:
324
+ return
325
+
326
+ yield from iter_active_model_rows(
327
+ model_name="bookmark",
328
+ rows=bookmarks_to_ctx_models(
329
+ binding_id=binding_id,
330
+ bookmarks=_scan_chrome_bookmarks(data_dirs),
331
+ ),
332
+ binding_models=binding_models,
333
+ )
334
+
335
+ return (
336
+ visit_resource,
337
+ bookmark_resource,
338
+ )