@pentatonic-ai/ai-agent-sdk 0.10.2 → 0.10.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs
CHANGED
|
@@ -878,7 +878,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
878
878
|
}
|
|
879
879
|
|
|
880
880
|
// src/telemetry.js
|
|
881
|
-
var VERSION = "0.10.
|
|
881
|
+
var VERSION = "0.10.3";
|
|
882
882
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
883
883
|
function machineId() {
|
|
884
884
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/dist/index.js
CHANGED
|
@@ -847,7 +847,7 @@ function fireAndForgetEmit(clientConfig, sessionOpts, messages, result, model) {
|
|
|
847
847
|
}
|
|
848
848
|
|
|
849
849
|
// src/telemetry.js
|
|
850
|
-
var VERSION = "0.10.
|
|
850
|
+
var VERSION = "0.10.3";
|
|
851
851
|
var TELEMETRY_URL = "https://sdk-telemetry.philip-134.workers.dev";
|
|
852
852
|
function machineId() {
|
|
853
853
|
const raw = typeof process !== "undefined" ? `${process.env?.USER || process.env?.USERNAME || "u"}:${process.platform || "x"}:${process.arch || "x"}` : "browser";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@pentatonic-ai/ai-agent-sdk",
|
|
3
|
-
"version": "0.10.
|
|
3
|
+
"version": "0.10.3",
|
|
4
4
|
"description": "TES SDK — LLM observability and lifecycle tracking via Pentatonic Thing Event System. Track token usage, tool calls, and conversations. Manage things through event-sourced lifecycle stages with AI enrichment and vector search.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.cjs",
|
|
@@ -256,3 +256,83 @@ def test_build_event_block_format_contract_truncation() -> None:
|
|
|
256
256
|
})
|
|
257
257
|
content_part = block.split("---\n", 1)[1]
|
|
258
258
|
assert len(content_part) == worker.MAX_CONTENT_CHARS
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
# ----------------------------------------------------------------------
|
|
262
|
+
# clean_content() — HTML/CSS strip so email + doc events don't distil
|
|
263
|
+
# into junk `concept` entities (font-face, mso-font-alt, panose-1, etc.).
|
|
264
|
+
# ----------------------------------------------------------------------
|
|
265
|
+
|
|
266
|
+
def test_clean_content_passthrough_on_plain_text() -> None:
|
|
267
|
+
"""Hot path: events without `<` or `{` skip all regex work."""
|
|
268
|
+
plain = "Hi Phil, can we ship this Thursday? — Carly"
|
|
269
|
+
assert worker.clean_content(plain) == plain
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def test_clean_content_strips_style_block() -> None:
|
|
273
|
+
"""`<style>...</style>` blocks contain raw CSS that would otherwise
|
|
274
|
+
pollute the entity graph with `font-family`, `mso-*` etc."""
|
|
275
|
+
html = (
|
|
276
|
+
"<style>body { font-family: Arial; mso-ascii-font-family: Times; }</style>"
|
|
277
|
+
"<p>Meeting confirmed for Thursday</p>"
|
|
278
|
+
)
|
|
279
|
+
cleaned = worker.clean_content(html)
|
|
280
|
+
assert "font-family" not in cleaned
|
|
281
|
+
assert "mso-" not in cleaned
|
|
282
|
+
assert "Meeting confirmed for Thursday" in cleaned
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def test_clean_content_strips_inline_tags() -> None:
|
|
286
|
+
"""Inline tags removed but the human text between them is kept."""
|
|
287
|
+
html = "<div><b>Acme Corp</b> joined the call</div>"
|
|
288
|
+
cleaned = worker.clean_content(html)
|
|
289
|
+
assert "<" not in cleaned and ">" not in cleaned
|
|
290
|
+
assert "Acme Corp joined the call" in cleaned
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def test_clean_content_decodes_entities_in_markup() -> None:
|
|
294
|
+
"""HTML entities decode after tag-strip so we keep human meaning.
|
|
295
|
+
Only fires when the content was tagged in the first place — pure
|
|
296
|
+
plain text takes the fast-path skip and entities pass through as-is
|
|
297
|
+
(which is fine; plain-text entities are rare and harmless)."""
|
|
298
|
+
html = "<p>Phil & Carly agreed: ship <next week></p>"
|
|
299
|
+
cleaned = worker.clean_content(html)
|
|
300
|
+
assert cleaned == "Phil & Carly agreed: ship <next week>"
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def test_clean_content_strips_mso_when_with_markup() -> None:
|
|
304
|
+
"""`mso-*` / `panose-1` declarations leak from Outlook exports. The
|
|
305
|
+
fast-path only fires on text without `<` or `{`, so we test the
|
|
306
|
+
common case: mso-tokens alongside HTML that triggers the cleaner.
|
|
307
|
+
Realistic shape — each declaration terminated by `;` like in real
|
|
308
|
+
Outlook CSS leak — so the strip doesn't greedy-match into the body."""
|
|
309
|
+
weird = "<div>mso-font-alt: Arial; panose-1: 2 11 6 4;\nMeeting on Thursday</div>"
|
|
310
|
+
cleaned = worker.clean_content(weird)
|
|
311
|
+
assert "mso-" not in cleaned
|
|
312
|
+
assert "panose-1" not in cleaned
|
|
313
|
+
assert "Meeting on Thursday" in cleaned
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def test_clean_content_fast_path_returns_plain_text_unchanged() -> None:
|
|
317
|
+
"""Documented contract of the fast-path: input with neither `<`
|
|
318
|
+
nor `{` passes through verbatim. Locks in the perf-vs-correctness
|
|
319
|
+
trade-off (most events are plain text; running 7 regexes on each
|
|
320
|
+
one is wasted)."""
|
|
321
|
+
plain = "mso-font-alt should pass through unchanged here"
|
|
322
|
+
assert worker.clean_content(plain) == plain
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
def test_clean_content_preserves_extractable_signal() -> None:
|
|
326
|
+
"""End-to-end: a representative email-shaped event should clean down
|
|
327
|
+
to just the human-readable body."""
|
|
328
|
+
email = (
|
|
329
|
+
"<html><head><style>"
|
|
330
|
+
"@font-face { font-family: 'Calibri'; panose-1: 2 15 5 2; }"
|
|
331
|
+
"</style></head><body>"
|
|
332
|
+
"<p>Phil — I've confirmed the SAFE amendments for Thursday.</p>"
|
|
333
|
+
"<p>— Timothy</p></body></html>"
|
|
334
|
+
)
|
|
335
|
+
cleaned = worker.clean_content(email)
|
|
336
|
+
assert "Calibri" not in cleaned and "panose-1" not in cleaned
|
|
337
|
+
assert "I've confirmed the SAFE amendments" in cleaned
|
|
338
|
+
assert "Timothy" in cleaned
|
|
@@ -143,12 +143,16 @@ nothing to extract, emit ONLY the header.
|
|
|
143
143
|
ENT|person|Alex Wong|alex@example.com
|
|
144
144
|
ENT|person|Acme Corp (org, no email)
|
|
145
145
|
ENT|person|Sam Patel (person, email not visible)
|
|
146
|
-
- FCT lines have
|
|
147
|
-
predicate, object, statement.
|
|
146
|
+
- FCT lines have EXACTLY 6 pipe-separated fields: `FCT`, category, subject, \
|
|
147
|
+
predicate, object, statement. COUNT THE PIPES: there must be 6 `|` segments. \
|
|
148
|
+
predicate and object are SEPARATE fields — NEVER merge them into the statement, \
|
|
149
|
+
and NEVER drop a field.
|
|
148
150
|
category ∈ {decision, commitment, state, mention, observation, preference}
|
|
149
151
|
subject MUST be an entity name declared in THIS event's ENT lines.
|
|
152
|
+
predicate is a short verb phrase (e.g. "agreed to", "owns", "works at").
|
|
150
153
|
object MAY be an entity name OR a literal string OR `-` if absent.
|
|
151
|
-
statement ≤ 140 characters.
|
|
154
|
+
statement ≤ 140 characters, a self-contained sentence.
|
|
155
|
+
WORKED EXAMPLE: `FCT|commitment|Timothy Bradley|agreed to|SAFE amendments|Timothy confirmed the SAFE amendments are set (14 May 2026)`
|
|
152
156
|
- REL lines have exactly 4 fields: `REL`, from, to, rel_type.
|
|
153
157
|
from and to MUST be entity names declared in THIS event's ENT lines.
|
|
154
158
|
rel_type is a short verb / preposition phrase.
|
|
@@ -167,10 +171,59 @@ A whole file is one entity, not twenty.
|
|
|
167
171
|
SYSTEM_PROMPT_HASH = hashlib.sha256(BATCH_SYSTEM_PROMPT.encode()).hexdigest()[:16]
|
|
168
172
|
|
|
169
173
|
|
|
174
|
+
# --------------------------------------------------------------------
|
|
175
|
+
# Content cleaner — strip HTML/CSS so email + doc styling never reaches
|
|
176
|
+
# the LLM as text to extract. Without this, events containing Outlook /
|
|
177
|
+
# Gmail / docx-export markup get distilled into junk concept entities
|
|
178
|
+
# (`font-face`, `mso-font-alt`, `panose-1`, `src`) that pollute the
|
|
179
|
+
# graph. clean_content() is a no-op fast path on plain text — only
|
|
180
|
+
# events whose body contains `<` or `{` pay the regex cost.
|
|
181
|
+
# --------------------------------------------------------------------
|
|
182
|
+
|
|
183
|
+
_CC_STYLE = re.compile(r"<(style|script)\b[^>]*>.*?</\1>", re.IGNORECASE | re.DOTALL)
|
|
184
|
+
_CC_CSSRULE = re.compile(r"[.#@]?[A-Za-z0-9_.:#> -]+\s*\{[^{}]*\}")
|
|
185
|
+
_CC_MSO = re.compile(r"\b(mso-[\w-]+|panose-1|font-family|font-face)\b[^;\n]*;?", re.IGNORECASE)
|
|
186
|
+
_CC_TAG = re.compile(r"<[^>]+>")
|
|
187
|
+
_CC_WS = re.compile(r"[ \t\r\f]+")
|
|
188
|
+
_CC_NL = re.compile(r"\n{3,}")
|
|
189
|
+
_CC_ENT = (
|
|
190
|
+
(" ", " "), ("&", "&"), ("<", "<"),
|
|
191
|
+
(">", ">"), (""", '"'), ("'", "'"), ("'", "'"),
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def clean_content(text: str) -> str:
|
|
196
|
+
"""Strip HTML/CSS so email + doc styling doesn't distil into junk
|
|
197
|
+
`concept` entities (font-face, mso-font-alt, etc.).
|
|
198
|
+
|
|
199
|
+
Fast early return on plain text (no `<` or `{`). On marked-up
|
|
200
|
+
content, removes `<style>` / `<script>` blocks first, then
|
|
201
|
+
standalone CSS rules, then all remaining tags, then MS-Office /
|
|
202
|
+
panose / font-face property runs that leak as freestanding tokens
|
|
203
|
+
in some Outlook exports. HTML entities are decoded last so we
|
|
204
|
+
don't accidentally introduce `<` tags from `<` after the tag
|
|
205
|
+
pass."""
|
|
206
|
+
if not text or ("<" not in text and "{" not in text):
|
|
207
|
+
return text
|
|
208
|
+
t = _CC_STYLE.sub(" ", text)
|
|
209
|
+
t = _CC_CSSRULE.sub(" ", t)
|
|
210
|
+
t = _CC_TAG.sub(" ", t)
|
|
211
|
+
t = _CC_MSO.sub(" ", t)
|
|
212
|
+
for a, b in _CC_ENT:
|
|
213
|
+
t = t.replace(a, b)
|
|
214
|
+
t = _CC_WS.sub(" ", t)
|
|
215
|
+
t = _CC_NL.sub("\n\n", t)
|
|
216
|
+
return t.strip()
|
|
217
|
+
|
|
218
|
+
|
|
170
219
|
def build_event_block(idx: int, event: dict[str, Any]) -> str:
|
|
171
|
-
"""Render one event as `[event K]\nheader\n---\ncontent` block.
|
|
220
|
+
"""Render one event as `[event K]\nheader\n---\ncontent` block.
|
|
221
|
+
|
|
222
|
+
Content is passed through `clean_content()` before truncation so
|
|
223
|
+
that the MAX_CONTENT_CHARS slice doesn't end up containing pure
|
|
224
|
+
HTML markup with no extractable signal."""
|
|
172
225
|
src = event.get("source_kind", "unknown")
|
|
173
|
-
content = (event.get("content") or "")[:MAX_CONTENT_CHARS]
|
|
226
|
+
content = clean_content(event.get("content") or "")[:MAX_CONTENT_CHARS]
|
|
174
227
|
attrs = event.get("attributes") or {}
|
|
175
228
|
when = attrs.get("emitted_at") or attrs.get("timestamp")
|
|
176
229
|
author = attrs.get("author") or attrs.get("user_id")
|