contextbase-plugin-imessage-local 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- contextbase_plugin_imessage_local-0.2.9.dist-info/METADATA +14 -0
- contextbase_plugin_imessage_local-0.2.9.dist-info/RECORD +17 -0
- contextbase_plugin_imessage_local-0.2.9.dist-info/WHEEL +4 -0
- plugin_imessage_local/__init__.py +0 -0
- plugin_imessage_local/attributed_body.py +269 -0
- plugin_imessage_local/binding_config.py +13 -0
- plugin_imessage_local/component.py +125 -0
- plugin_imessage_local/defs/__init__.py +0 -0
- plugin_imessage_local/defs/defs.yaml +1 -0
- plugin_imessage_local/models/__init__.py +0 -0
- plugin_imessage_local/models/base.py +7 -0
- plugin_imessage_local/models/ctx.py +193 -0
- plugin_imessage_local/models/ingress.py +321 -0
- plugin_imessage_local/models/translators.py +384 -0
- plugin_imessage_local/plugin.json +7 -0
- plugin_imessage_local/sources/__init__.py +0 -0
- plugin_imessage_local/sources/snapshot.py +234 -0
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: contextbase-plugin-imessage-local
|
|
3
|
+
Version: 0.2.9
|
|
4
|
+
Summary: iMessage local plugin for ContextBase
|
|
5
|
+
Author: Alizain Feerasta
|
|
6
|
+
Author-email: Alizain Feerasta <alizain.feerasta@gmail.com>
|
|
7
|
+
Requires-Dist: contextbase-shared-plugins==0.2.9
|
|
8
|
+
Requires-Dist: dagster==1.12.14
|
|
9
|
+
Requires-Dist: dagster-dlt==0.28.14
|
|
10
|
+
Requires-Dist: dlt>=1.26.0
|
|
11
|
+
Requires-Dist: pydantic>=2.12.0
|
|
12
|
+
Requires-Dist: sqlalchemy>=2.0.0
|
|
13
|
+
Requires-Dist: pytypedstream>=0.1.0
|
|
14
|
+
Requires-Python: >=3.14, <3.15
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
plugin_imessage_local/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
plugin_imessage_local/attributed_body.py,sha256=dqDGhacVNQBiItvJ4s-OESK6eI-P6oS6iHFIFly09XM,8690
|
|
3
|
+
plugin_imessage_local/binding_config.py,sha256=27KQdL1jBL3EaNvETYbdW_mTAjBBNCapEVrdC8bgwME,346
|
|
4
|
+
plugin_imessage_local/component.py,sha256=zM6l0XLl7Eb67HVzGk8XvvSthSbwt2A_B5pmBhMB_o0,4342
|
|
5
|
+
plugin_imessage_local/defs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
|
+
plugin_imessage_local/defs/defs.yaml,sha256=3VPEop8FV-z3MzhwZ1At1E6LJ8nY0XZXmf_dgi3E74Y,65
|
|
7
|
+
plugin_imessage_local/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
+
plugin_imessage_local/models/base.py,sha256=Fp7n4EfxQOxzqm34-D-rYyu60P88eZUIewCBNP_Iuc4,119
|
|
9
|
+
plugin_imessage_local/models/ctx.py,sha256=ssFsqd13O7WFdmWp46sgaJmwIW3GIKC6dr7w0lOOams,6578
|
|
10
|
+
plugin_imessage_local/models/ingress.py,sha256=k9RhevINbEB22-tc_nerw-nWM49q1BNpiKa7ojzun6s,15077
|
|
11
|
+
plugin_imessage_local/models/translators.py,sha256=gpf8qZAdFX-5tP7xW8eCDLGn5xD05TJ2c38Ti7M03x0,14776
|
|
12
|
+
plugin_imessage_local/plugin.json,sha256=Qb9qSVo9PHFxPb1KHKCgWqEL82ukL9wWlZZb37p0j8U,87
|
|
13
|
+
plugin_imessage_local/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
plugin_imessage_local/sources/snapshot.py,sha256=dZFApIcC6Xrc-B0KQOu5WZSKdC4oPSak5d6C14Xx_os,8203
|
|
15
|
+
contextbase_plugin_imessage_local-0.2.9.dist-info/WHEEL,sha256=i9aSRDivn5iP9LaR1BLQX2GNAuriQWPsFwbbWygTX2k,81
|
|
16
|
+
contextbase_plugin_imessage_local-0.2.9.dist-info/METADATA,sha256=1SqkU6WzhTtb690xO_gQ-9kn9l1Q6NqcygxSuVQXshs,485
|
|
17
|
+
contextbase_plugin_imessage_local-0.2.9.dist-info/RECORD,,
|
|
File without changes
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""Decode iMessage ``message.attributedBody`` blobs into text + attribute runs.
|
|
2
|
+
|
|
3
|
+
The ``attributedBody`` column stores an NeXTSTEP-typedstream-archived
|
|
4
|
+
``NSAttributedString``. On modern macOS ~97% of messages populate this column
|
|
5
|
+
rather than the plain ``text`` column, so extracting it is essential.
|
|
6
|
+
|
|
7
|
+
Two tiers:
|
|
8
|
+
|
|
9
|
+
1. **Primary** — ``pytypedstream`` reads the root ``NSMutableAttributedString``
|
|
10
|
+
object, then we walk its attribute-run structure to recover text plus the
|
|
11
|
+
per-run attribute dictionaries.
|
|
12
|
+
2. **Fallback** — for blobs that raise ``InvalidTypedStreamError`` we attempt a
|
|
13
|
+
length-aware byte scan modeled on ``niftycode/imessage_reader`` which
|
|
14
|
+
recovers just the plain text.
|
|
15
|
+
|
|
16
|
+
Returns ``None`` only when both tiers fail. The raw blob is still preserved
|
|
17
|
+
upstream in the ingress model, so any future decoder can revisit it without
|
|
18
|
+
re-ingesting.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
from collections.abc import Mapping
|
|
25
|
+
from dataclasses import dataclass, field
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
from typedstream import (
|
|
29
|
+
GenericArchivedObject,
|
|
30
|
+
InvalidTypedStreamError,
|
|
31
|
+
TypedGroup,
|
|
32
|
+
TypedValue,
|
|
33
|
+
unarchive_from_data,
|
|
34
|
+
)
|
|
35
|
+
from typedstream.types.foundation import (
|
|
36
|
+
NSArray,
|
|
37
|
+
NSData,
|
|
38
|
+
NSDate,
|
|
39
|
+
NSDictionary,
|
|
40
|
+
NSMutableArray,
|
|
41
|
+
NSMutableData,
|
|
42
|
+
NSMutableDictionary,
|
|
43
|
+
NSMutableSet,
|
|
44
|
+
NSMutableString,
|
|
45
|
+
NSNumber,
|
|
46
|
+
NSSet,
|
|
47
|
+
NSString,
|
|
48
|
+
NSURL,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
LOGGER = logging.getLogger(__name__)
|
|
52
|
+
|
|
53
|
+
_BYTESCAN_SENTINEL = b"NSString"
|
|
54
|
+
_BYTESCAN_PREAMBLE = b"\x01\x94\x84\x01\x2b" # '\x01' '\x94' '\x84' '\x01' '+'
|
|
55
|
+
_BYTESCAN_LONG_LENGTH_MARKER = 0x81
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass(frozen=True)
|
|
59
|
+
class TextRun:
|
|
60
|
+
"""A contiguous attribute run within a decoded ``NSAttributedString``.
|
|
61
|
+
|
|
62
|
+
``location`` and ``length`` are UTF-16 code-unit offsets to match Apple's
|
|
63
|
+
native string representation. Python ``str`` uses code points, so callers
|
|
64
|
+
that want to slice the text need to encode to UTF-16 first.
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
location: int
|
|
68
|
+
length: int
|
|
69
|
+
attributes: Mapping[str, Any]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass(frozen=True)
|
|
73
|
+
class DecodedBody:
|
|
74
|
+
"""Result of decoding an ``attributedBody`` blob."""
|
|
75
|
+
|
|
76
|
+
text: str
|
|
77
|
+
runs: tuple[TextRun, ...] = field(default_factory=tuple)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def decode_attributed_body(blob: bytes) -> DecodedBody | None:
|
|
81
|
+
"""Decode a raw ``attributedBody`` blob.
|
|
82
|
+
|
|
83
|
+
Returns ``None`` iff both the typedstream decoder and the byte-scan
|
|
84
|
+
fallback fail to recover any text.
|
|
85
|
+
"""
|
|
86
|
+
if not blob:
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
return _decode_via_typedstream(blob)
|
|
91
|
+
except InvalidTypedStreamError as exc:
|
|
92
|
+
LOGGER.debug(
|
|
93
|
+
"attributed_body.typedstream_failed falling back to byte-scan: %s", exc
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
return _decode_via_bytescan(blob)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _decode_via_typedstream(blob: bytes) -> DecodedBody:
|
|
100
|
+
obj = unarchive_from_data(blob)
|
|
101
|
+
if not isinstance(obj, GenericArchivedObject):
|
|
102
|
+
raise InvalidTypedStreamError(
|
|
103
|
+
f"attributedBody root is {type(obj).__name__}, expected "
|
|
104
|
+
"GenericArchivedObject"
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
class_name = obj.clazz.name.decode("ascii", errors="replace")
|
|
108
|
+
if "AttributedString" not in class_name:
|
|
109
|
+
raise InvalidTypedStreamError(
|
|
110
|
+
f"attributedBody root class is {class_name!r}, expected an "
|
|
111
|
+
"NSAttributedString subclass"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
contents = obj.contents
|
|
115
|
+
if not contents:
|
|
116
|
+
raise InvalidTypedStreamError("attributedBody has no encoded contents")
|
|
117
|
+
|
|
118
|
+
text_item = contents[0]
|
|
119
|
+
if not isinstance(text_item, TypedValue):
|
|
120
|
+
raise InvalidTypedStreamError(
|
|
121
|
+
f"attributedBody[0] is {type(text_item).__name__}, expected TypedValue"
|
|
122
|
+
)
|
|
123
|
+
text = _as_python_string(text_item.value)
|
|
124
|
+
|
|
125
|
+
runs = tuple(_walk_attribute_runs(contents[1:]))
|
|
126
|
+
return DecodedBody(text=text, runs=runs)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _walk_attribute_runs(tail: list[Any]) -> list[TextRun]:
|
|
130
|
+
"""Replay the NSAttributedString run sequence.
|
|
131
|
+
|
|
132
|
+
Layout is (TypedGroup[index, length] [, TypedValue(NSDictionary)])*.
|
|
133
|
+
``index == 1 + len(seen_dicts)`` means "a new dictionary follows"; any
|
|
134
|
+
lower index is a back-reference into the stored dictionaries.
|
|
135
|
+
"""
|
|
136
|
+
runs: list[TextRun] = []
|
|
137
|
+
stored: list[Mapping[str, Any]] = []
|
|
138
|
+
location = 0
|
|
139
|
+
|
|
140
|
+
cursor = 0
|
|
141
|
+
while cursor < len(tail):
|
|
142
|
+
group = tail[cursor]
|
|
143
|
+
if not isinstance(group, TypedGroup):
|
|
144
|
+
raise InvalidTypedStreamError(
|
|
145
|
+
f"attributedBody run entry is {type(group).__name__}, "
|
|
146
|
+
"expected TypedGroup"
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
raw_index, raw_length = group.values
|
|
151
|
+
except (TypeError, ValueError) as exc:
|
|
152
|
+
raise InvalidTypedStreamError(
|
|
153
|
+
f"attributedBody run group has unexpected shape: {group!r}"
|
|
154
|
+
) from exc
|
|
155
|
+
|
|
156
|
+
index = int(raw_index)
|
|
157
|
+
length = int(raw_length)
|
|
158
|
+
|
|
159
|
+
if index == len(stored) + 1:
|
|
160
|
+
cursor += 1
|
|
161
|
+
if cursor >= len(tail):
|
|
162
|
+
raise InvalidTypedStreamError(
|
|
163
|
+
"attributedBody ended mid-run; expected NSDictionary after "
|
|
164
|
+
f"new-dict marker index={index}"
|
|
165
|
+
)
|
|
166
|
+
dict_item = tail[cursor]
|
|
167
|
+
if not isinstance(dict_item, TypedValue):
|
|
168
|
+
raise InvalidTypedStreamError(
|
|
169
|
+
f"attributedBody expected TypedValue(NSDictionary), got "
|
|
170
|
+
f"{type(dict_item).__name__}"
|
|
171
|
+
)
|
|
172
|
+
attrs = _as_python(dict_item.value)
|
|
173
|
+
if not isinstance(attrs, Mapping):
|
|
174
|
+
raise InvalidTypedStreamError(
|
|
175
|
+
f"attributedBody attribute dict is {type(attrs).__name__}, "
|
|
176
|
+
"expected Mapping"
|
|
177
|
+
)
|
|
178
|
+
stored.append(attrs)
|
|
179
|
+
elif 1 <= index <= len(stored):
|
|
180
|
+
attrs = stored[index - 1]
|
|
181
|
+
else:
|
|
182
|
+
raise InvalidTypedStreamError(
|
|
183
|
+
f"attributedBody run references attribute index {index} with "
|
|
184
|
+
f"only {len(stored)} stored"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
runs.append(TextRun(location=location, length=length, attributes=attrs))
|
|
188
|
+
location += length
|
|
189
|
+
cursor += 1
|
|
190
|
+
|
|
191
|
+
return runs
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _decode_via_bytescan(blob: bytes) -> DecodedBody | None:
|
|
195
|
+
"""Length-aware byte-scan fallback; recovers text only, no attributes."""
|
|
196
|
+
anchor = blob.find(_BYTESCAN_SENTINEL)
|
|
197
|
+
if anchor < 0:
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
cursor = anchor + len(_BYTESCAN_SENTINEL)
|
|
201
|
+
if blob[cursor : cursor + len(_BYTESCAN_PREAMBLE)] != _BYTESCAN_PREAMBLE:
|
|
202
|
+
return None
|
|
203
|
+
cursor += len(_BYTESCAN_PREAMBLE)
|
|
204
|
+
|
|
205
|
+
if cursor >= len(blob):
|
|
206
|
+
return None
|
|
207
|
+
|
|
208
|
+
marker = blob[cursor]
|
|
209
|
+
cursor += 1
|
|
210
|
+
if marker == _BYTESCAN_LONG_LENGTH_MARKER:
|
|
211
|
+
if cursor + 2 > len(blob):
|
|
212
|
+
return None
|
|
213
|
+
length = int.from_bytes(blob[cursor : cursor + 2], "little")
|
|
214
|
+
cursor += 2
|
|
215
|
+
else:
|
|
216
|
+
length = marker
|
|
217
|
+
|
|
218
|
+
text_bytes = blob[cursor : cursor + length]
|
|
219
|
+
if len(text_bytes) != length:
|
|
220
|
+
return None
|
|
221
|
+
|
|
222
|
+
try:
|
|
223
|
+
text = text_bytes.decode("utf-8")
|
|
224
|
+
except UnicodeDecodeError:
|
|
225
|
+
return None
|
|
226
|
+
|
|
227
|
+
return DecodedBody(text=text, runs=())
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def _as_python_string(value: Any) -> str:
|
|
231
|
+
if isinstance(value, (NSString, NSMutableString)):
|
|
232
|
+
return str(value.value)
|
|
233
|
+
if isinstance(value, str):
|
|
234
|
+
return value
|
|
235
|
+
raise InvalidTypedStreamError(
|
|
236
|
+
f"expected NSString-compatible value, got {type(value).__name__}"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _as_python(value: Any) -> Any:
|
|
241
|
+
"""Convert pytypedstream NS* types to native Python containers.
|
|
242
|
+
|
|
243
|
+
Unknown Foundation subclasses raise ``InvalidTypedStreamError`` so the
|
|
244
|
+
decoder falls through to the byte-scan tier rather than silently emitting
|
|
245
|
+
opaque sentinel values.
|
|
246
|
+
"""
|
|
247
|
+
if value is None or isinstance(value, (str, int, float, bool, bytes)):
|
|
248
|
+
return value
|
|
249
|
+
if isinstance(value, (NSString, NSMutableString)):
|
|
250
|
+
return str(value.value)
|
|
251
|
+
if isinstance(value, NSNumber):
|
|
252
|
+
return value.value
|
|
253
|
+
if isinstance(value, (NSMutableData, NSData)):
|
|
254
|
+
return bytes(value.data)
|
|
255
|
+
if isinstance(value, NSURL):
|
|
256
|
+
return value.value
|
|
257
|
+
if isinstance(value, NSDate):
|
|
258
|
+
return value.value
|
|
259
|
+
if isinstance(value, (NSDictionary, NSMutableDictionary)):
|
|
260
|
+
return {_as_python(k): _as_python(v) for k, v in value.contents.items()}
|
|
261
|
+
if isinstance(value, (NSArray, NSMutableArray, NSSet, NSMutableSet)):
|
|
262
|
+
contents = getattr(value, "contents", None) or getattr(value, "elements", None)
|
|
263
|
+
if contents is None:
|
|
264
|
+
return []
|
|
265
|
+
return [_as_python(x) for x in contents]
|
|
266
|
+
raise InvalidTypedStreamError(
|
|
267
|
+
f"unhandled Foundation type in attribute value: "
|
|
268
|
+
f"{type(value).__name__} (repr={value!r})"
|
|
269
|
+
)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
from pydantic import Field
|
|
6
|
+
|
|
7
|
+
from shared_plugins.bindings import BaseBindingConfigModel, ResolvedPath
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class IMessageLocalBindingConfig(BaseBindingConfigModel):
|
|
11
|
+
chat_db: ResolvedPath = Field(
|
|
12
|
+
default_factory=lambda: Path.home() / "Library" / "Messages" / "chat.db",
|
|
13
|
+
)
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
import dagster as dg
|
|
2
|
+
from dagster import AssetExecutionContext
|
|
3
|
+
from dagster_dlt import DagsterDltResource
|
|
4
|
+
from shared_plugins.automation import non_overlapping_automation_condition
|
|
5
|
+
from shared_plugins.bindings import parse_binding_config
|
|
6
|
+
from shared_plugins.control_plane import ControlPlaneClient
|
|
7
|
+
from shared_plugins.dlt import resolve_partition_binding, run_dlt_pipeline
|
|
8
|
+
from shared_plugins.naming import (
|
|
9
|
+
dagster_asset_group_name,
|
|
10
|
+
dagster_asset_tags,
|
|
11
|
+
dagster_dlt_asset_key,
|
|
12
|
+
dagster_partition_def_name,
|
|
13
|
+
dagster_pool_name,
|
|
14
|
+
dlt_source_name,
|
|
15
|
+
plugin_id_from_module,
|
|
16
|
+
)
|
|
17
|
+
from shared_plugins.resources import DLT_RESOURCE
|
|
18
|
+
|
|
19
|
+
from .binding_config import IMessageLocalBindingConfig
|
|
20
|
+
from .sources.snapshot import imessage_local_snapshot_source
|
|
21
|
+
|
|
22
|
+
PLUGIN_ID = plugin_id_from_module(__file__)
|
|
23
|
+
SNAPSHOT_JOB = "snapshot"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _build_snapshot_specs(
|
|
27
|
+
partitions_def: dg.PartitionsDefinition,
|
|
28
|
+
automation_condition: dg.AutomationCondition,
|
|
29
|
+
) -> list[dg.AssetSpec]:
|
|
30
|
+
snapshot_source_name = dlt_source_name(PLUGIN_ID, SNAPSHOT_JOB)
|
|
31
|
+
shared = dict(
|
|
32
|
+
group_name=dagster_asset_group_name(PLUGIN_ID),
|
|
33
|
+
tags=dagster_asset_tags(PLUGIN_ID),
|
|
34
|
+
automation_condition=automation_condition,
|
|
35
|
+
partitions_def=partitions_def,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
return [
|
|
39
|
+
dg.AssetSpec(
|
|
40
|
+
key=dagster_dlt_asset_key(snapshot_source_name, "handle"),
|
|
41
|
+
**shared,
|
|
42
|
+
),
|
|
43
|
+
dg.AssetSpec(
|
|
44
|
+
key=dagster_dlt_asset_key(snapshot_source_name, "chat"),
|
|
45
|
+
**shared,
|
|
46
|
+
),
|
|
47
|
+
dg.AssetSpec(
|
|
48
|
+
key=dagster_dlt_asset_key(snapshot_source_name, "attachment"),
|
|
49
|
+
**shared,
|
|
50
|
+
),
|
|
51
|
+
dg.AssetSpec(
|
|
52
|
+
key=dagster_dlt_asset_key(snapshot_source_name, "message"),
|
|
53
|
+
**shared,
|
|
54
|
+
),
|
|
55
|
+
dg.AssetSpec(
|
|
56
|
+
key=dagster_dlt_asset_key(snapshot_source_name, "chat_handle_join"),
|
|
57
|
+
**shared,
|
|
58
|
+
),
|
|
59
|
+
dg.AssetSpec(
|
|
60
|
+
key=dagster_dlt_asset_key(snapshot_source_name, "chat_message_join"),
|
|
61
|
+
**shared,
|
|
62
|
+
),
|
|
63
|
+
dg.AssetSpec(
|
|
64
|
+
key=dagster_dlt_asset_key(snapshot_source_name, "message_attachment_join"),
|
|
65
|
+
**shared,
|
|
66
|
+
),
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class IMessageLocalSyncComponent(dg.Component):
|
|
71
|
+
def build_defs(self, context: dg.ComponentLoadContext) -> dg.Definitions:
|
|
72
|
+
partitions_def = dg.DynamicPartitionsDefinition(
|
|
73
|
+
name=dagster_partition_def_name(PLUGIN_ID)
|
|
74
|
+
)
|
|
75
|
+
snapshot_specs = _build_snapshot_specs(
|
|
76
|
+
partitions_def=partitions_def,
|
|
77
|
+
automation_condition=non_overlapping_automation_condition(
|
|
78
|
+
dg.AutomationCondition.on_missing()
|
|
79
|
+
| dg.AutomationCondition.on_cron("*/15 * * * *")
|
|
80
|
+
),
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
@dg.multi_asset(
|
|
84
|
+
specs=snapshot_specs,
|
|
85
|
+
can_subset=True,
|
|
86
|
+
name="imessage_local_snapshot",
|
|
87
|
+
pool=dagster_pool_name(PLUGIN_ID),
|
|
88
|
+
)
|
|
89
|
+
def imessage_local_snapshot_assets(
|
|
90
|
+
context: AssetExecutionContext,
|
|
91
|
+
dlt_resource: DagsterDltResource,
|
|
92
|
+
control_plane: dg.ResourceParam[ControlPlaneClient],
|
|
93
|
+
):
|
|
94
|
+
binding = resolve_partition_binding(
|
|
95
|
+
context=context,
|
|
96
|
+
control_plane=control_plane,
|
|
97
|
+
plugin_id=PLUGIN_ID,
|
|
98
|
+
)
|
|
99
|
+
binding_id = str(binding.binding_id)
|
|
100
|
+
cfg = parse_binding_config(binding, IMessageLocalBindingConfig)
|
|
101
|
+
|
|
102
|
+
source = imessage_local_snapshot_source(binding_id, cfg)
|
|
103
|
+
yield from run_dlt_pipeline(
|
|
104
|
+
context=context,
|
|
105
|
+
dlt_resource=dlt_resource,
|
|
106
|
+
source=source,
|
|
107
|
+
plugin_id=PLUGIN_ID,
|
|
108
|
+
binding_id=binding_id,
|
|
109
|
+
job_name=SNAPSHOT_JOB,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
automation_sensor = dg.AutomationConditionSensorDefinition(
|
|
113
|
+
name="imessage_local_automation_sensor",
|
|
114
|
+
target=dg.AssetSelection.assets(imessage_local_snapshot_assets),
|
|
115
|
+
default_status=dg.DefaultSensorStatus.RUNNING,
|
|
116
|
+
minimum_interval_seconds=30,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
return dg.Definitions(
|
|
120
|
+
assets=[imessage_local_snapshot_assets],
|
|
121
|
+
sensors=[automation_sensor],
|
|
122
|
+
resources={
|
|
123
|
+
"dlt_resource": DLT_RESOURCE,
|
|
124
|
+
},
|
|
125
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
type: plugin_imessage_local.component.IMessageLocalSyncComponent
|
|
File without changes
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from pydantic import AwareDatetime
|
|
6
|
+
from shared_plugins.models import CtxModel
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class HandleRow(CtxModel):
|
|
10
|
+
id: str
|
|
11
|
+
service: str
|
|
12
|
+
country: str | None = None
|
|
13
|
+
uncanonicalized_id: str | None = None
|
|
14
|
+
person_centric_id: str | None = None
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ChatRow(CtxModel):
|
|
18
|
+
guid: str
|
|
19
|
+
style: int | None = None
|
|
20
|
+
state: int | None = None
|
|
21
|
+
account_id: str | None = None
|
|
22
|
+
properties: bytes | None = None
|
|
23
|
+
chat_identifier: str | None = None
|
|
24
|
+
service_name: str | None = None
|
|
25
|
+
room_name: str | None = None
|
|
26
|
+
account_login: str | None = None
|
|
27
|
+
is_archived: int | None = None
|
|
28
|
+
last_addressed_handle: str | None = None
|
|
29
|
+
display_name: str | None = None
|
|
30
|
+
group_id: str | None = None
|
|
31
|
+
is_filtered: int | None = None
|
|
32
|
+
successful_query: int | None = None
|
|
33
|
+
engram_id: str | None = None
|
|
34
|
+
server_change_token: str | None = None
|
|
35
|
+
ck_sync_state: int | None = None
|
|
36
|
+
original_group_id: str | None = None
|
|
37
|
+
last_read_message_timestamp: AwareDatetime | None = None
|
|
38
|
+
sr_server_change_token: str | None = None
|
|
39
|
+
sr_ck_sync_state: int | None = None
|
|
40
|
+
cloudkit_record_id: str | None = None
|
|
41
|
+
sr_cloudkit_record_id: str | None = None
|
|
42
|
+
last_addressed_sim_id: str | None = None
|
|
43
|
+
is_blackholed: int | None = None
|
|
44
|
+
syndication_date: AwareDatetime | None = None
|
|
45
|
+
syndication_type: int | None = None
|
|
46
|
+
is_recovered: int | None = None
|
|
47
|
+
is_deleting_incoming_messages: int | None = None
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class AttachmentRow(CtxModel):
|
|
51
|
+
guid: str
|
|
52
|
+
created_date: AwareDatetime | None = None
|
|
53
|
+
start_date: AwareDatetime | None = None
|
|
54
|
+
filename: str | None = None
|
|
55
|
+
uti: str | None = None
|
|
56
|
+
mime_type: str | None = None
|
|
57
|
+
transfer_state: int | None = None
|
|
58
|
+
is_outgoing: int | None = None
|
|
59
|
+
user_info: bytes | None = None
|
|
60
|
+
transfer_name: str | None = None
|
|
61
|
+
total_bytes: int | None = None
|
|
62
|
+
is_sticker: int | None = None
|
|
63
|
+
sticker_user_info: bytes | None = None
|
|
64
|
+
attribution_info: bytes | None = None
|
|
65
|
+
hide_attachment: int | None = None
|
|
66
|
+
ck_sync_state: int | None = None
|
|
67
|
+
ck_server_change_token_blob: bytes | None = None
|
|
68
|
+
ck_record_id: str | None = None
|
|
69
|
+
original_guid: str
|
|
70
|
+
sr_ck_sync_state: int | None = None
|
|
71
|
+
sr_ck_server_change_token_blob: bytes | None = None
|
|
72
|
+
sr_ck_record_id: str | None = None
|
|
73
|
+
is_commsafety_sensitive: int | None = None
|
|
74
|
+
emoji_image_content_identifier: str | None = None
|
|
75
|
+
emoji_image_short_description: str | None = None
|
|
76
|
+
preview_generation_state: int | None = None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class MessageRow(CtxModel):
|
|
80
|
+
guid: str
|
|
81
|
+
text: str | None = None
|
|
82
|
+
replace: int | None = None
|
|
83
|
+
service_center: str | None = None
|
|
84
|
+
handle_id: int | None = None
|
|
85
|
+
handle_natural_id: str | None = None
|
|
86
|
+
handle_natural_service: str | None = None
|
|
87
|
+
subject: str | None = None
|
|
88
|
+
country: str | None = None
|
|
89
|
+
attributed_body: bytes | None = None
|
|
90
|
+
attribute_runs: list[dict[str, Any]] | None = None
|
|
91
|
+
version: int | None = None
|
|
92
|
+
type: int | None = None
|
|
93
|
+
service: str | None = None
|
|
94
|
+
account: str | None = None
|
|
95
|
+
account_guid: str | None = None
|
|
96
|
+
error: int | None = None
|
|
97
|
+
date: AwareDatetime | None = None
|
|
98
|
+
date_read: AwareDatetime | None = None
|
|
99
|
+
date_delivered: AwareDatetime | None = None
|
|
100
|
+
is_delivered: int | None = None
|
|
101
|
+
is_finished: int | None = None
|
|
102
|
+
is_emote: int | None = None
|
|
103
|
+
is_from_me: int | None = None
|
|
104
|
+
is_empty: int | None = None
|
|
105
|
+
is_delayed: int | None = None
|
|
106
|
+
is_auto_reply: int | None = None
|
|
107
|
+
is_prepared: int | None = None
|
|
108
|
+
is_read: int | None = None
|
|
109
|
+
is_system_message: int | None = None
|
|
110
|
+
is_sent: int | None = None
|
|
111
|
+
has_dd_results: int | None = None
|
|
112
|
+
is_service_message: int | None = None
|
|
113
|
+
is_forward: int | None = None
|
|
114
|
+
was_downgraded: int | None = None
|
|
115
|
+
is_archive: int | None = None
|
|
116
|
+
was_data_detected: int | None = None
|
|
117
|
+
was_deduplicated: int | None = None
|
|
118
|
+
is_audio_message: int | None = None
|
|
119
|
+
is_played: int | None = None
|
|
120
|
+
date_played: AwareDatetime | None = None
|
|
121
|
+
item_type: int | None = None
|
|
122
|
+
other_handle: int | None = None
|
|
123
|
+
other_handle_natural_id: str | None = None
|
|
124
|
+
other_handle_natural_service: str | None = None
|
|
125
|
+
group_title: str | None = None
|
|
126
|
+
group_action_type: int | None = None
|
|
127
|
+
share_status: int | None = None
|
|
128
|
+
share_direction: int | None = None
|
|
129
|
+
is_expirable: int | None = None
|
|
130
|
+
expire_state: int | None = None
|
|
131
|
+
message_action_type: int | None = None
|
|
132
|
+
message_source: int | None = None
|
|
133
|
+
associated_message_guid: str | None = None
|
|
134
|
+
associated_message_type: int | None = None
|
|
135
|
+
balloon_bundle_id: str | None = None
|
|
136
|
+
payload_data: bytes | None = None
|
|
137
|
+
expressive_send_style_id: str | None = None
|
|
138
|
+
associated_message_range_location: int | None = None
|
|
139
|
+
associated_message_range_length: int | None = None
|
|
140
|
+
time_expressive_send_played: AwareDatetime | None = None
|
|
141
|
+
message_summary_info: bytes | None = None
|
|
142
|
+
destination_caller_id: str | None = None
|
|
143
|
+
is_corrupt: int | None = None
|
|
144
|
+
reply_to_guid: str | None = None
|
|
145
|
+
sort_id: int | None = None
|
|
146
|
+
is_spam: int | None = None
|
|
147
|
+
has_unseen_mention: int | None = None
|
|
148
|
+
thread_originator_guid: str | None = None
|
|
149
|
+
thread_originator_part: str | None = None
|
|
150
|
+
syndication_ranges: str | None = None
|
|
151
|
+
was_delivered_quietly: int | None = None
|
|
152
|
+
did_notify_recipient: int | None = None
|
|
153
|
+
synced_syndication_ranges: str | None = None
|
|
154
|
+
date_retracted: AwareDatetime | None = None
|
|
155
|
+
date_edited: AwareDatetime | None = None
|
|
156
|
+
was_detonated: int | None = None
|
|
157
|
+
part_count: int | None = None
|
|
158
|
+
is_stewie: int | None = None
|
|
159
|
+
is_kt_verified: int | None = None
|
|
160
|
+
is_sos: int | None = None
|
|
161
|
+
is_critical: int | None = None
|
|
162
|
+
bia_reference_id: str | None = None
|
|
163
|
+
fallback_hash: str | None = None
|
|
164
|
+
associated_message_emoji: str | None = None
|
|
165
|
+
is_pending_satellite_send: int | None = None
|
|
166
|
+
needs_relay: int | None = None
|
|
167
|
+
schedule_type: int | None = None
|
|
168
|
+
schedule_state: int | None = None
|
|
169
|
+
sent_or_received_off_grid: int | None = None
|
|
170
|
+
date_recovered: AwareDatetime | None = None
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class ChatHandleJoinRow(CtxModel):
|
|
174
|
+
chat_id: int
|
|
175
|
+
chat_guid: str
|
|
176
|
+
handle_id: int
|
|
177
|
+
handle_natural_id: str
|
|
178
|
+
handle_natural_service: str
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class ChatMessageJoinRow(CtxModel):
|
|
182
|
+
chat_id: int
|
|
183
|
+
chat_guid: str
|
|
184
|
+
message_id: int
|
|
185
|
+
message_guid: str
|
|
186
|
+
message_date: AwareDatetime | None = None
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class MessageAttachmentJoinRow(CtxModel):
|
|
190
|
+
message_id: int
|
|
191
|
+
message_guid: str
|
|
192
|
+
attachment_id: int
|
|
193
|
+
attachment_guid: str
|