@ahtmljs/next 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/extractors/data-attrs.d.ts +26 -0
- package/dist/extractors/data-attrs.d.ts.map +1 -0
- package/dist/extractors/data-attrs.js +163 -0
- package/dist/extractors/data-attrs.js.map +1 -0
- package/dist/extractors/index.d.ts +12 -0
- package/dist/extractors/index.d.ts.map +1 -0
- package/dist/extractors/index.js +12 -0
- package/dist/extractors/index.js.map +1 -0
- package/dist/extractors/merge.d.ts +13 -0
- package/dist/extractors/merge.d.ts.map +1 -0
- package/dist/extractors/merge.js +25 -0
- package/dist/extractors/merge.js.map +1 -0
- package/dist/extractors/opengraph.d.ts +7 -0
- package/dist/extractors/opengraph.d.ts.map +1 -0
- package/dist/extractors/opengraph.js +89 -0
- package/dist/extractors/opengraph.js.map +1 -0
- package/dist/extractors/schema-org.d.ts +9 -0
- package/dist/extractors/schema-org.d.ts.map +1 -0
- package/dist/extractors/schema-org.js +104 -0
- package/dist/extractors/schema-org.js.map +1 -0
- package/dist/handler.d.ts +43 -0
- package/dist/handler.d.ts.map +1 -0
- package/dist/handler.js +139 -0
- package/dist/handler.js.map +1 -0
- package/dist/index.d.ts +50 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +48 -0
- package/dist/index.js.map +1 -0
- package/dist/llms-txt.d.ts +42 -0
- package/dist/llms-txt.d.ts.map +1 -0
- package/dist/llms-txt.js +88 -0
- package/dist/llms-txt.js.map +1 -0
- package/dist/mcp.d.ts +30 -0
- package/dist/mcp.d.ts.map +1 -0
- package/dist/mcp.js +64 -0
- package/dist/mcp.js.map +1 -0
- package/dist/openapi.d.ts +13 -0
- package/dist/openapi.d.ts.map +1 -0
- package/dist/openapi.js +74 -0
- package/dist/openapi.js.map +1 -0
- package/dist/policy.d.ts +24 -0
- package/dist/policy.d.ts.map +1 -0
- package/dist/policy.js +79 -0
- package/dist/policy.js.map +1 -0
- package/dist/well-known.d.ts +40 -0
- package/dist/well-known.d.ts.map +1 -0
- package/dist/well-known.js +56 -0
- package/dist/well-known.js.map +1 -0
- package/package.json +33 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract entities and actions from inline `data-ahtml-*` attributes.
|
|
3
|
+
*
|
|
4
|
+
* This is the Level-1 adoption path: zero tooling change, just sprinkle
|
|
5
|
+
* data attributes on your existing markup.
|
|
6
|
+
*
|
|
7
|
+
* <article
|
|
8
|
+
* data-ahtml="product"
|
|
9
|
+
* data-ahtml-id="product:mbp-14"
|
|
10
|
+
* data-ahtml-name="MacBook Pro 14"
|
|
11
|
+
* data-ahtml-price="1999 USD"
|
|
12
|
+
* data-ahtml-stock="in_stock (42)"
|
|
13
|
+
* >
|
|
14
|
+
* ...
|
|
15
|
+
* <button
|
|
16
|
+
* data-ahtml-action="purchase"
|
|
17
|
+
* data-ahtml-action-auth="required"
|
|
18
|
+
* data-ahtml-action-cost="1999 USD purchase"
|
|
19
|
+
* data-ahtml-action-reversible="P30D full_refund"
|
|
20
|
+
* data-ahtml-action-target="product:mbp-14"
|
|
21
|
+
* >Buy now</button>
|
|
22
|
+
* </article>
|
|
23
|
+
*/
|
|
24
|
+
import type { Extraction } from './merge.js';
|
|
25
|
+
export declare function extractFromDataAttrs(html: string): Extraction;
|
|
26
|
+
//# sourceMappingURL=data-attrs.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data-attrs.d.ts","sourceRoot":"","sources":["../../src/extractors/data-attrs.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAE7C,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU,CAiB7D"}
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract entities and actions from inline `data-ahtml-*` attributes.
|
|
3
|
+
*
|
|
4
|
+
* This is the Level-1 adoption path: zero tooling change, just sprinkle
|
|
5
|
+
* data attributes on your existing markup.
|
|
6
|
+
*
|
|
7
|
+
* <article
|
|
8
|
+
* data-ahtml="product"
|
|
9
|
+
* data-ahtml-id="product:mbp-14"
|
|
10
|
+
* data-ahtml-name="MacBook Pro 14"
|
|
11
|
+
* data-ahtml-price="1999 USD"
|
|
12
|
+
* data-ahtml-stock="in_stock (42)"
|
|
13
|
+
* >
|
|
14
|
+
* ...
|
|
15
|
+
* <button
|
|
16
|
+
* data-ahtml-action="purchase"
|
|
17
|
+
* data-ahtml-action-auth="required"
|
|
18
|
+
* data-ahtml-action-cost="1999 USD purchase"
|
|
19
|
+
* data-ahtml-action-reversible="P30D full_refund"
|
|
20
|
+
* data-ahtml-action-target="product:mbp-14"
|
|
21
|
+
* >Buy now</button>
|
|
22
|
+
* </article>
|
|
23
|
+
*/
|
|
24
|
+
export function extractFromDataAttrs(html) {
|
|
25
|
+
const entities = [];
|
|
26
|
+
const actions = [];
|
|
27
|
+
// Naive element scan — sufficient for plugin's compile-time pass.
|
|
28
|
+
// For runtime DOM, callers should use the agent-side parser.
|
|
29
|
+
const elementRe = /<(\w+)\b([^>]*?data-ahtml(?:-[\w-]+)?=[^>]*)>/gi;
|
|
30
|
+
let m;
|
|
31
|
+
while ((m = elementRe.exec(html)) !== null) {
|
|
32
|
+
const attrs = parseAttrs(m[2]);
|
|
33
|
+
const ent = entityFromAttrs(attrs);
|
|
34
|
+
if (ent)
|
|
35
|
+
entities.push(ent);
|
|
36
|
+
const act = actionFromAttrs(attrs);
|
|
37
|
+
if (act)
|
|
38
|
+
actions.push(act);
|
|
39
|
+
}
|
|
40
|
+
return { source: 'data-attrs', entities, actions };
|
|
41
|
+
}
|
|
42
|
+
function entityFromAttrs(attrs) {
|
|
43
|
+
const type = attrs['data-ahtml'];
|
|
44
|
+
if (!type)
|
|
45
|
+
return null;
|
|
46
|
+
const id = attrs['data-ahtml-id'] ?? `${type}:${slug(attrs['data-ahtml-name'] ?? type)}`;
|
|
47
|
+
if (type === 'product') {
|
|
48
|
+
const p = { id, type: 'product', name: attrs['data-ahtml-name'] ?? '' };
|
|
49
|
+
if (attrs['data-ahtml-brand'])
|
|
50
|
+
p.brand = attrs['data-ahtml-brand'];
|
|
51
|
+
if (attrs['data-ahtml-description'])
|
|
52
|
+
p.description = attrs['data-ahtml-description'];
|
|
53
|
+
const price = parseMoney(attrs['data-ahtml-price']);
|
|
54
|
+
if (price)
|
|
55
|
+
p.price = price;
|
|
56
|
+
const stock = parseStock(attrs['data-ahtml-stock']);
|
|
57
|
+
if (stock)
|
|
58
|
+
p.stock = stock;
|
|
59
|
+
if (attrs['data-ahtml-sku'])
|
|
60
|
+
p.sku = attrs['data-ahtml-sku'];
|
|
61
|
+
return p;
|
|
62
|
+
}
|
|
63
|
+
if (type === 'document' || type === 'article') {
|
|
64
|
+
const d = {
|
|
65
|
+
id,
|
|
66
|
+
type: 'document',
|
|
67
|
+
title: attrs['data-ahtml-title'] ?? attrs['data-ahtml-name'] ?? '',
|
|
68
|
+
};
|
|
69
|
+
if (attrs['data-ahtml-author'])
|
|
70
|
+
d.author = attrs['data-ahtml-author'];
|
|
71
|
+
if (attrs['data-ahtml-published'])
|
|
72
|
+
d.published_at = attrs['data-ahtml-published'];
|
|
73
|
+
if (attrs['data-ahtml-summary'])
|
|
74
|
+
d.summary = attrs['data-ahtml-summary'];
|
|
75
|
+
return d;
|
|
76
|
+
}
|
|
77
|
+
if (type === 'task') {
|
|
78
|
+
const t = {
|
|
79
|
+
id,
|
|
80
|
+
type: 'task',
|
|
81
|
+
title: attrs['data-ahtml-title'] ?? attrs['data-ahtml-name'] ?? '',
|
|
82
|
+
state: (attrs['data-ahtml-state'] ?? 'open'),
|
|
83
|
+
};
|
|
84
|
+
if (attrs['data-ahtml-priority'])
|
|
85
|
+
t.priority = attrs['data-ahtml-priority'];
|
|
86
|
+
if (attrs['data-ahtml-assignee'])
|
|
87
|
+
t.assignee = attrs['data-ahtml-assignee'];
|
|
88
|
+
return t;
|
|
89
|
+
}
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
function actionFromAttrs(attrs) {
|
|
93
|
+
const id = attrs['data-ahtml-action'];
|
|
94
|
+
if (!id)
|
|
95
|
+
return null;
|
|
96
|
+
const a = { id };
|
|
97
|
+
if (attrs['data-ahtml-action-target'])
|
|
98
|
+
a.target = attrs['data-ahtml-action-target'];
|
|
99
|
+
const auth = attrs['data-ahtml-action-auth'];
|
|
100
|
+
if (auth)
|
|
101
|
+
a.auth = auth;
|
|
102
|
+
const cost = attrs['data-ahtml-action-cost'];
|
|
103
|
+
if (cost) {
|
|
104
|
+
const m = cost.match(/^([\d.]+)\s+(\w+)\s+(\w+)$/);
|
|
105
|
+
if (m)
|
|
106
|
+
a.cost = { amount: parseFloat(m[1]), currency: m[2], category: m[3] };
|
|
107
|
+
}
|
|
108
|
+
const rev = attrs['data-ahtml-action-reversible'];
|
|
109
|
+
if (rev) {
|
|
110
|
+
if (rev === 'no')
|
|
111
|
+
a.reversible = { reversible: false };
|
|
112
|
+
else {
|
|
113
|
+
const [w, ...rest] = rev.split(/\s+/);
|
|
114
|
+
a.reversible = { reversible: true, window: w, policy: rest.join(' ') || undefined };
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
const se = attrs['data-ahtml-action-side-effects'];
|
|
118
|
+
if (se)
|
|
119
|
+
a.side_effects = se.split(',').map((s) => s.trim());
|
|
120
|
+
const conf = attrs['data-ahtml-action-confirmation'];
|
|
121
|
+
if (conf)
|
|
122
|
+
a.confirmation = conf;
|
|
123
|
+
const method = attrs['data-ahtml-action-method'];
|
|
124
|
+
if (method)
|
|
125
|
+
a.method = method;
|
|
126
|
+
const exec = attrs['data-ahtml-action-execute'];
|
|
127
|
+
if (exec)
|
|
128
|
+
a.execute_url = exec;
|
|
129
|
+
return a;
|
|
130
|
+
}
|
|
131
|
+
function parseAttrs(s) {
|
|
132
|
+
const r = {};
|
|
133
|
+
const re = /(\w[\w-]*)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/g;
|
|
134
|
+
let m;
|
|
135
|
+
while ((m = re.exec(s)) !== null) {
|
|
136
|
+
r[m[1].toLowerCase()] = m[3] ?? m[4] ?? m[5] ?? '';
|
|
137
|
+
}
|
|
138
|
+
return r;
|
|
139
|
+
}
|
|
140
|
+
function parseMoney(s) {
|
|
141
|
+
if (!s)
|
|
142
|
+
return null;
|
|
143
|
+
const m = s.match(/^([\d.]+)\s+(\w+)$/);
|
|
144
|
+
return m ? { amount: parseFloat(m[1]), currency: m[2] } : null;
|
|
145
|
+
}
|
|
146
|
+
function parseStock(s) {
|
|
147
|
+
if (!s)
|
|
148
|
+
return null;
|
|
149
|
+
const m = s.match(/^(\w+)\s*(?:\((\d+)\))?$/);
|
|
150
|
+
if (!m)
|
|
151
|
+
return null;
|
|
152
|
+
return m[2]
|
|
153
|
+
? { status: m[1], quantity: parseInt(m[2], 10) }
|
|
154
|
+
: { status: m[1] };
|
|
155
|
+
}
|
|
156
|
+
function slug(s) {
|
|
157
|
+
return s
|
|
158
|
+
.toLowerCase()
|
|
159
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
160
|
+
.replace(/^-+|-+$/g, '')
|
|
161
|
+
.slice(0, 64) || 'unknown';
|
|
162
|
+
}
|
|
163
|
+
//# sourceMappingURL=data-attrs.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"data-attrs.js","sourceRoot":"","sources":["../../src/extractors/data-attrs.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;GAsBG;AAKH,MAAM,UAAU,oBAAoB,CAAC,IAAY;IAC/C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,kEAAkE;IAClE,6DAA6D;IAC7D,MAAM,SAAS,GAAG,iDAAiD,CAAC;IACpE,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC3C,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QACnC,IAAI,GAAG;YAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC5B,MAAM,GAAG,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;QACnC,IAAI,GAAG;YAAE,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAC7B,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC;AACrD,CAAC;AAED,SAAS,eAAe,CAAC,KAA6B;IACpD,MAAM,IAAI,GAAG,KAAK,CAAC,YAAY,CAAC,CAAC;IACjC,IAAI,CAAC,IAAI;QAAE,OAAO,IAAI,CAAC;IACvB,MAAM,EAAE,GAAG,KAAK,CAAC,eAAe,CAAC,IAAI,GAAG,IAAI,IAAI,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;IACzF,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QACvB,MAAM,CAAC,GAAY,EAAE,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE,EAAE,CAAC;QACjF,IAAI,KAAK,CAAC,kBAAkB,CAAC;YAAE,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC,kBAAkB,CAAE,CAAC;QACpE,IAAI,KAAK,CAAC,wBAAwB,CAAC;YAAE,CAAC,CAAC,WAAW,GAAG,KAAK,CAAC,wBAAwB,CAAE,CAAC;QACtF,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC,CAAC;QACpD,IAAI,KAAK;YAAE,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC;QAC3B,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC,CAAC;QACpD,IAAI,KAAK;YAAE,CAAC,CAAC,KAAK,GAAG,KAAK,CAAC;QAC3B,IAAI,KAAK,CAAC,gBAAgB,CAAC;YAAE,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,gBAAgB,CAAE,CAAC;QAC9D,OAAO,CAAC,CAAC;IACX,CAAC;IACD,IAAI,IAAI,KAAK,UAAU,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QAC9C,MAAM,CAAC,GAAa;YAClB,EAAE;YACF,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,KAAK,CAAC,kBAAkB,CAAC,IAAI,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE;SACnE,CAAC;QACF,IAAI,KAAK,CAAC,mBAAmB,CAAC;YAAE,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC,mBAAmB,CAAE,CAAC;QACvE,IAAI,KAAK,CAAC,sBAAsB,CAAC;YAAE,CAAC,CAAC,YAAY,GAAG,KAAK,CAAC,sBAAsB,CAAE,CAAC;QACnF,IAAI,KAAK,CAAC,oBAAoB,CAAC;YAAE,CAAC,CAAC,OAAO,GAAG,KAAK,CAAC,oBAAoB,CAAE,CAAC;QAC1E,OAAO,CAAC,CAAC;IACX,CAAC;IACD,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;QACpB,MAAM,CAAC,GAAS;YACd,EAAE;YACF,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE,KAAK,CAAC,kBAAkB,CAAC,IAAI,KAAK,CAAC,iBAAiB,CAAC,IAAI,EAAE;YAClE,KAAK,EAAE,CAAC,KAAK,CAAC,kBAAkB,CAAC,IAAI,MAAM,CAAkB;SAC9D,CAAC;QACF,IAAI,KAAK,CAAC,qBAAqB,CAAC;YAAE,CAAC,CAAC,QAAQ,GAAG,KAAK,CAAC,qBAAqB,CAAqB,CAAC;QAChG,IAAI,KAAK,CAAC,qBAAqB,CAAC;YAAE,CAAC,CAAC,QAAQ,GAAG,KAAK,CAAC,qBAAqB,CAAE,CAAC;QAC7E,OAAO,CAAC,CAAC;IACX,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,SAAS,eAAe,CAAC,KAA6B;IACpD,MAAM,EAAE,GAAG,KAAK,CAAC,mBAAmB,CAAC,CAAC;IACtC,IAAI,CAAC,EAAE;QAAE,OAAO,IAAI,CAAC;IACrB,MAAM,CAAC,GAAW,EAAE,EAAE,EAAE,CAAC;IACzB,IAAI,KAAK,CAAC,0BAA0B,CAAC;QAAE,CAAC,CAAC,MAAM,GAAG,KAAK,CAAC,0BAA0B,CAAE,CAAC;IACrF,MAAM,IAAI,GAAG,KAAK,CAAC,wBAAwB,CAAC,CAAC;IAC7C,IAAI,IAAI;QAAE,CAAC,CAAC,IAAI,GAAG,IAAsB,CAAC;IAC1C,MAAM,IAAI,GAAG,KAAK,CAAC,wBAAwB,CAAC,CAAC;IAC7C,IAAI,IAAI,EAAE,CAAC;QACT,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAC;QACnD,IAAI,CAAC;YAAE,CAAC,CAAC,IAAI,GAAG,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAE,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAgB,EAAE,CAAC;IAChG,CAAC;IACD,MAAM,GAAG,GAAG,KAAK,CAAC,8BAA8B,CAAC,CAAC;IAClD,IAAI,GAAG,EAAE,CAAC;QACR,IAAI,GAAG,KAAK,IAAI;YAAE,CAAC,CAAC,UAAU,GAAG,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;aAClD,CAAC;YACJ,MAAM,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACtC,CAAC,CAAC,UAAU,GAAG,EAAE,UAAU,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,EAAE,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,SAAS,EAAE,CAAC;QACtF,CAAC;IACH,CAAC;IACD,MAAM,EAAE,GAAG,KAAK,CAAC,gCAAgC,CAAC,CAAC;IACnD,IAAI,EAAE;QAAE,CAAC,CAAC,YAAY,GAAG,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IAC5D,MAAM,IAAI,GAAG,KAAK,CAAC,gCAAgC,CAAC,CAAC;IACrD,IAAI,IAAI;QAAE,CAAC,CAAC,YAAY,GAAG,IAA8B,CAAC;IAC1D,MAAM,MAAM,GAAG,KAAK,CAAC,0BAA0B,CAAC,CAAC;IACjD,IAAI,MAAM;QAAE,CAAC,CAAC,MAAM,GAAG,MAA0B,CAAC;IAClD,MAAM,IAAI,GAAG,KAAK,CAAC,2BAA2B,CAAC,CAAC;IAChD,IAAI,IAAI;QAAE,CAAC,CAAC,WAAW,GAAG,IAAI,CAAC;IAC/B,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,UAAU,CAAC,CAAS;IAC3B,MAAM,CAAC,GAA2B,EAAE,CAAC;IACrC,MAAM,EAAE,GAAG,+CAA+C,CAAC;IAC3D,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,WAAW,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACtD,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,UAAU,CAAC,CAAqB;IACvC,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;IACxC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,CAAC,CAAC,CAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;AACnE,CAAC;AAED,SAAS,UAAU,CAAC,CAAqB;IACvC,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;IAC9C,IAAI,CAAC,CAAC;QAAE,OAAO,IAAI,CAAC;IACpB,OAAO,CAAC,CAAC,CAAC,CAAC;QACT,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAqB,EAAE,QAAQ,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAE,EAAE,EAAE,CAAC,EAAE;QACrE,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,CAAqB,EAAE,CAAC;AAC3C,CAAC;AAED,SAAS,IAAI,CAAC,CAAS;IACrB,OAAO,CAAC;SACL,WAAW,EAAE;SACb,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;SACvB,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,SAAS,CAAC;AAC/B,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-extractors.
|
|
3
|
+
*
|
|
4
|
+
* Run a developer's existing HTML through these to produce a Level-0
|
|
5
|
+
* snapshot with no annotation work. Combined output of multiple
|
|
6
|
+
* extractors wins by precedence: data-attrs > schema.org > OpenGraph.
|
|
7
|
+
*/
|
|
8
|
+
export { extractFromSchemaOrg } from './schema-org.js';
|
|
9
|
+
export { extractFromOpenGraph } from './opengraph.js';
|
|
10
|
+
export { extractFromDataAttrs } from './data-attrs.js';
|
|
11
|
+
export { mergeExtractions, type Extraction } from './merge.js';
|
|
12
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/extractors/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AACtD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,gBAAgB,EAAE,KAAK,UAAU,EAAE,MAAM,YAAY,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Auto-extractors.
|
|
3
|
+
*
|
|
4
|
+
* Run a developer's existing HTML through these to produce a Level-0
|
|
5
|
+
* snapshot with no annotation work. Combined output of multiple
|
|
6
|
+
* extractors wins by precedence: data-attrs > schema.org > OpenGraph.
|
|
7
|
+
*/
|
|
8
|
+
export { extractFromSchemaOrg } from './schema-org.js';
|
|
9
|
+
export { extractFromOpenGraph } from './opengraph.js';
|
|
10
|
+
export { extractFromDataAttrs } from './data-attrs.js';
|
|
11
|
+
export { mergeExtractions } from './merge.js';
|
|
12
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/extractors/index.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,oBAAoB,EAAE,MAAM,gBAAgB,CAAC;AACtD,OAAO,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAC;AACvD,OAAO,EAAE,gBAAgB,EAAmB,MAAM,YAAY,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import type { Entity, Action } from '@ahtmljs/schema';
|
|
2
|
+
export interface Extraction {
|
|
3
|
+
source: 'data-attrs' | 'schema-org' | 'opengraph' | 'route-metadata';
|
|
4
|
+
page_type?: string;
|
|
5
|
+
entities: Entity[];
|
|
6
|
+
actions: Action[];
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Merge extractions in precedence order. Earlier entries override later ones
|
|
10
|
+
* on conflict (so pass data-attrs FIRST, opengraph LAST).
|
|
11
|
+
*/
|
|
12
|
+
export declare function mergeExtractions(extractions: Extraction[]): Extraction;
|
|
13
|
+
//# sourceMappingURL=merge.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"merge.d.ts","sourceRoot":"","sources":["../../src/extractors/merge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,iBAAiB,CAAC;AAEtD,MAAM,WAAW,UAAU;IACzB,MAAM,EAAE,YAAY,GAAG,YAAY,GAAG,WAAW,GAAG,gBAAgB,CAAC;IACrE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,OAAO,EAAE,MAAM,EAAE,CAAC;CACnB;AAED;;;GAGG;AACH,wBAAgB,gBAAgB,CAAC,WAAW,EAAE,UAAU,EAAE,GAAG,UAAU,CAkBtE"}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Merge extractions in precedence order. Earlier entries override later ones
|
|
3
|
+
* on conflict (so pass data-attrs FIRST, opengraph LAST).
|
|
4
|
+
*/
|
|
5
|
+
export function mergeExtractions(extractions) {
|
|
6
|
+
const merged = { source: 'data-attrs', entities: [], actions: [] };
|
|
7
|
+
const entityById = new Map();
|
|
8
|
+
const actionById = new Map();
|
|
9
|
+
for (const ex of extractions) {
|
|
10
|
+
if (!merged.page_type && ex.page_type)
|
|
11
|
+
merged.page_type = ex.page_type;
|
|
12
|
+
for (const e of ex.entities) {
|
|
13
|
+
const prev = entityById.get(e.id);
|
|
14
|
+
entityById.set(e.id, prev ? { ...e, ...prev } : e);
|
|
15
|
+
}
|
|
16
|
+
for (const a of ex.actions) {
|
|
17
|
+
const prev = actionById.get(a.id);
|
|
18
|
+
actionById.set(a.id, prev ? { ...a, ...prev } : a);
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
merged.entities = [...entityById.values()];
|
|
22
|
+
merged.actions = [...actionById.values()];
|
|
23
|
+
return merged;
|
|
24
|
+
}
|
|
25
|
+
//# sourceMappingURL=merge.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"merge.js","sourceRoot":"","sources":["../../src/extractors/merge.ts"],"names":[],"mappings":"AASA;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,WAAyB;IACxD,MAAM,MAAM,GAAe,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAC/E,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC7C,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IAC7C,KAAK,MAAM,EAAE,IAAI,WAAW,EAAE,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,SAAS,IAAI,EAAE,CAAC,SAAS;YAAE,MAAM,CAAC,SAAS,GAAG,EAAE,CAAC,SAAS,CAAC;QACvE,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,QAAQ,EAAE,CAAC;YAC5B,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YAClC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC,CAAE,EAAE,GAAG,CAAC,EAAE,GAAG,IAAI,EAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,CAAC;QACD,KAAK,MAAM,CAAC,IAAI,EAAE,CAAC,OAAO,EAAE,CAAC;YAC3B,MAAM,IAAI,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;YAClC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC,CAAE,EAAE,GAAG,CAAC,EAAE,GAAG,IAAI,EAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACjE,CAAC;IACH,CAAC;IACD,MAAM,CAAC,QAAQ,GAAG,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IAC3C,MAAM,CAAC,OAAO,GAAG,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,CAAC;IAC1C,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract a fallback entity from OpenGraph + Twitter card meta tags.
|
|
3
|
+
* Lower-precedence than schema.org, but covers sites that only ship OG.
|
|
4
|
+
*/
|
|
5
|
+
import type { Extraction } from './merge.js';
|
|
6
|
+
export declare function extractFromOpenGraph(html: string): Extraction;
|
|
7
|
+
//# sourceMappingURL=opengraph.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"opengraph.d.ts","sourceRoot":"","sources":["../../src/extractors/opengraph.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAE7C,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU,CA4C7D"}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract a fallback entity from OpenGraph + Twitter card meta tags.
|
|
3
|
+
* Lower-precedence than schema.org, but covers sites that only ship OG.
|
|
4
|
+
*/
|
|
5
|
+
export function extractFromOpenGraph(html) {
|
|
6
|
+
const meta = readMeta(html);
|
|
7
|
+
if (!meta.size)
|
|
8
|
+
return { source: 'opengraph', entities: [], actions: [] };
|
|
9
|
+
const ogType = meta.get('og:type') ?? 'website';
|
|
10
|
+
const title = meta.get('og:title') ?? meta.get('twitter:title') ?? '';
|
|
11
|
+
const description = meta.get('og:description') ?? meta.get('twitter:description') ?? '';
|
|
12
|
+
const url = meta.get('og:url') ?? '';
|
|
13
|
+
const image = meta.get('og:image') ?? meta.get('twitter:image');
|
|
14
|
+
const entities = [];
|
|
15
|
+
if (ogType === 'product') {
|
|
16
|
+
const price = meta.get('product:price:amount') ?? meta.get('og:price:amount');
|
|
17
|
+
const currency = meta.get('product:price:currency') ?? meta.get('og:price:currency') ?? 'USD';
|
|
18
|
+
const id = `product:${slug(title || url)}`;
|
|
19
|
+
const p = { id, type: 'product', name: title || 'product' };
|
|
20
|
+
if (description)
|
|
21
|
+
p.description = description;
|
|
22
|
+
if (price)
|
|
23
|
+
p.price = { amount: Number(price), currency };
|
|
24
|
+
if (image)
|
|
25
|
+
p.images = [{ url: image }];
|
|
26
|
+
entities.push(p);
|
|
27
|
+
}
|
|
28
|
+
else if (ogType === 'article' || ogType.startsWith('article')) {
|
|
29
|
+
const d = {
|
|
30
|
+
id: `document:${slug(title || url)}`,
|
|
31
|
+
type: 'document',
|
|
32
|
+
title: title || 'untitled',
|
|
33
|
+
};
|
|
34
|
+
if (description)
|
|
35
|
+
d.summary = description;
|
|
36
|
+
if (meta.get('article:published_time'))
|
|
37
|
+
d.published_at = meta.get('article:published_time');
|
|
38
|
+
if (meta.get('article:modified_time'))
|
|
39
|
+
d.modified_at = meta.get('article:modified_time');
|
|
40
|
+
if (meta.get('article:author'))
|
|
41
|
+
d.author = meta.get('article:author');
|
|
42
|
+
if (url)
|
|
43
|
+
d.canonical_url = url;
|
|
44
|
+
entities.push(d);
|
|
45
|
+
}
|
|
46
|
+
else if (title) {
|
|
47
|
+
const d = {
|
|
48
|
+
id: `document:${slug(title)}`,
|
|
49
|
+
type: 'document',
|
|
50
|
+
title,
|
|
51
|
+
};
|
|
52
|
+
if (description)
|
|
53
|
+
d.summary = description;
|
|
54
|
+
if (url)
|
|
55
|
+
d.canonical_url = url;
|
|
56
|
+
entities.push(d);
|
|
57
|
+
}
|
|
58
|
+
return { source: 'opengraph', entities, actions: [] };
|
|
59
|
+
}
|
|
60
|
+
function readMeta(html) {
|
|
61
|
+
const out = new Map();
|
|
62
|
+
const re = /<meta\s+([^>]+?)\/?\s*>/gi;
|
|
63
|
+
let m;
|
|
64
|
+
while ((m = re.exec(html)) !== null) {
|
|
65
|
+
const attrs = parseAttrs(m[1]);
|
|
66
|
+
const key = attrs.property ?? attrs.name;
|
|
67
|
+
const value = attrs.content;
|
|
68
|
+
if (key && value !== undefined)
|
|
69
|
+
out.set(key.toLowerCase(), value);
|
|
70
|
+
}
|
|
71
|
+
return out;
|
|
72
|
+
}
|
|
73
|
+
function parseAttrs(s) {
|
|
74
|
+
const r = {};
|
|
75
|
+
const re = /(\w[\w:-]*)\s*=\s*("([^"]*)"|'([^']*)'|(\S+))/g;
|
|
76
|
+
let m;
|
|
77
|
+
while ((m = re.exec(s)) !== null) {
|
|
78
|
+
r[m[1].toLowerCase()] = m[3] ?? m[4] ?? m[5] ?? '';
|
|
79
|
+
}
|
|
80
|
+
return r;
|
|
81
|
+
}
|
|
82
|
+
function slug(s) {
|
|
83
|
+
return s
|
|
84
|
+
.toLowerCase()
|
|
85
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
86
|
+
.replace(/^-+|-+$/g, '')
|
|
87
|
+
.slice(0, 64) || 'unknown';
|
|
88
|
+
}
|
|
89
|
+
//# sourceMappingURL=opengraph.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"opengraph.js","sourceRoot":"","sources":["../../src/extractors/opengraph.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,UAAU,oBAAoB,CAAC,IAAY;IAC/C,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC;IAC5B,IAAI,CAAC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAE1E,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC;IAChD,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,eAAe,CAAC,IAAI,EAAE,CAAC;IACtE,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,qBAAqB,CAAC,IAAI,EAAE,CAAC;IACxF,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;IACrC,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,eAAe,CAAC,CAAC;IAEhE,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,sBAAsB,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,iBAAiB,CAAC,CAAC;QAC9E,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,wBAAwB,CAAC,IAAI,IAAI,CAAC,GAAG,CAAC,mBAAmB,CAAC,IAAI,KAAK,CAAC;QAC9F,MAAM,EAAE,GAAG,WAAW,IAAI,CAAC,KAAK,IAAI,GAAG,CAAC,EAAE,CAAC;QAC3C,MAAM,CAAC,GAAY,EAAE,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,IAAI,SAAS,EAAE,CAAC;QACrE,IAAI,WAAW;YAAE,CAAC,CAAC,WAAW,GAAG,WAAW,CAAC;QAC7C,IAAI,KAAK;YAAE,CAAC,CAAC,KAAK,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,EAAE,QAAQ,EAAE,CAAC;QACzD,IAAI,KAAK;YAAE,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;QACvC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnB,CAAC;SAAM,IAAI,MAAM,KAAK,SAAS,IAAI,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;QAChE,MAAM,CAAC,GAAa;YAClB,EAAE,EAAE,YAAY,IAAI,CAAC,KAAK,IAAI,GAAG,CAAC,EAAE;YACpC,IAAI,EAAE,UAAU;YAChB,KAAK,EAAE,KAAK,IAAI,UAAU;SAC3B,CAAC;QACF,IAAI,WAAW;YAAE,CAAC,CAAC,OAAO,GAAG,WAAW,CAAC;QACzC,IAAI,IAAI,CAAC,GAAG,CAAC,wBAAwB,CAAC;YAAE,CAAC,CAAC,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,wBAAwB,CAAC,CAAC;QAC5F,IAAI,IAAI,CAAC,GAAG,CAAC,uBAAuB,CAAC;YAAE,CAAC,CAAC,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QACzF,IAAI,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC;YAAE,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;QACtE,IAAI,GAAG;YAAE,CAAC,CAAC,aAAa,GAAG,GAAG,CAAC;QAC/B,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnB,CAAC;SAAM,IAAI,KAAK,EAAE,CAAC;QACjB,MAAM,CAAC,GAAa;YAClB,EAAE,EAAE,YAAY,IAAI,CAAC,KAAK,CAAC,EAAE;YAC7B,IAAI,EAAE,UAAU;YAChB,KAAK;SACN,CAAC;QACF,IAAI,WAAW;YAAE,CAAC,CAAC,OAAO,GAAG,WAAW,CAAC;QACzC,IAAI,GAAG;YAAE,CAAC,CAAC,aAAa,GAAG,GAAG,CAAC;QAC/B,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACnB,CAAC;IAED,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;AACxD,CAAC;AAED,SAAS,QAAQ,CAAC,IAAY;IAC5B,MAAM,GAAG,GAAG,IAAI,GAAG,EAAkB,CAAC;IACtC,MAAM,EAAE,GAAG,2BAA2B,CAAC;IACvC,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpC,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,KAAK,CAAC,QAAQ,IAAI,KAAK,CAAC,IAAI,CAAC;QACzC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC;QAC5B,IAAI,GAAG,IAAI,KAAK,KAAK,SAAS;YAAE,GAAG,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,EAAE,EAAE,KAAK,CAAC,CAAC;IACpE,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,SAAS,UAAU,CAAC,CAAS;IAC3B,MAAM,CAAC,GAA2B,EAAE,CAAC;IACrC,MAAM,EAAE,GAAG,gDAAgD,CAAC;IAC5D,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACjC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,WAAW,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACtD,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,IAAI,CAAC,CAAS;IACrB,OAAO,CAAC;SACL,WAAW,EAAE;SACb,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;SACvB,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,SAAS,CAAC;AAC/B,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract entities from inline schema.org JSON-LD blocks.
|
|
3
|
+
*
|
|
4
|
+
* If a site already publishes JSON-LD (most e-commerce, news, recipes do),
|
|
5
|
+
* we get a free Level-0 AHTML snapshot with zero developer work.
|
|
6
|
+
*/
|
|
7
|
+
import type { Extraction } from './merge.js';
|
|
8
|
+
export declare function extractFromSchemaOrg(html: string): Extraction;
|
|
9
|
+
//# sourceMappingURL=schema-org.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema-org.d.ts","sourceRoot":"","sources":["../../src/extractors/schema-org.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAGH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,YAAY,CAAC;AAE7C,wBAAgB,oBAAoB,CAAC,IAAI,EAAE,MAAM,GAAG,UAAU,CAY7D"}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Extract entities from inline schema.org JSON-LD blocks.
|
|
3
|
+
*
|
|
4
|
+
* If a site already publishes JSON-LD (most e-commerce, news, recipes do),
|
|
5
|
+
* we get a free Level-0 AHTML snapshot with zero developer work.
|
|
6
|
+
*/
|
|
7
|
+
export function extractFromSchemaOrg(html) {
|
|
8
|
+
const entities = [];
|
|
9
|
+
const blocks = matchAll(html, /<script[^>]*type=["']application\/ld\+json["'][^>]*>([\s\S]*?)<\/script>/gi);
|
|
10
|
+
for (const block of blocks) {
|
|
11
|
+
try {
|
|
12
|
+
const data = JSON.parse(block);
|
|
13
|
+
visit(data, entities);
|
|
14
|
+
}
|
|
15
|
+
catch {
|
|
16
|
+
// skip malformed blocks
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
return { source: 'schema-org', entities, actions: [] };
|
|
20
|
+
}
|
|
21
|
+
function visit(node, out) {
|
|
22
|
+
if (!node || typeof node !== 'object')
|
|
23
|
+
return;
|
|
24
|
+
if (Array.isArray(node)) {
|
|
25
|
+
for (const n of node)
|
|
26
|
+
visit(n, out);
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
const obj = node;
|
|
30
|
+
const type = obj['@type'];
|
|
31
|
+
if (type === 'Product' || type === 'IndividualProduct' || type === 'ProductModel') {
|
|
32
|
+
const name = String(obj.name ?? '');
|
|
33
|
+
const id = `product:${slug(name)}`;
|
|
34
|
+
const p = {
|
|
35
|
+
id,
|
|
36
|
+
type: 'product',
|
|
37
|
+
name,
|
|
38
|
+
...(typeof obj.brand === 'object' && obj.brand && { brand: String(obj.brand.name ?? '') }),
|
|
39
|
+
...(typeof obj.description === 'string' && { description: obj.description }),
|
|
40
|
+
...(typeof obj.sku === 'string' && { sku: obj.sku }),
|
|
41
|
+
};
|
|
42
|
+
const offers = obj.offers;
|
|
43
|
+
if (offers) {
|
|
44
|
+
const offer = Array.isArray(offers) ? offers[0] : offers;
|
|
45
|
+
if (offer) {
|
|
46
|
+
if (offer.price !== undefined && offer.priceCurrency) {
|
|
47
|
+
p.price = { amount: Number(offer.price), currency: String(offer.priceCurrency) };
|
|
48
|
+
}
|
|
49
|
+
const avail = String(offer.availability ?? '').toLowerCase();
|
|
50
|
+
if (avail.includes('instock'))
|
|
51
|
+
p.stock = { status: 'in_stock' };
|
|
52
|
+
else if (avail.includes('outofstock'))
|
|
53
|
+
p.stock = { status: 'out_of_stock' };
|
|
54
|
+
else if (avail.includes('preorder'))
|
|
55
|
+
p.stock = { status: 'preorder' };
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
const rating = obj.aggregateRating;
|
|
59
|
+
if (rating) {
|
|
60
|
+
p.rating = {
|
|
61
|
+
average: Number(rating.ratingValue ?? 0),
|
|
62
|
+
count: Number(rating.reviewCount ?? rating.ratingCount ?? 0),
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
out.push(p);
|
|
66
|
+
}
|
|
67
|
+
else if (type === 'Article' || type === 'BlogPosting' || type === 'NewsArticle') {
|
|
68
|
+
const title = String(obj.headline ?? obj.name ?? '');
|
|
69
|
+
const d = {
|
|
70
|
+
id: `document:${slug(title)}`,
|
|
71
|
+
type: 'document',
|
|
72
|
+
title,
|
|
73
|
+
...(typeof obj.datePublished === 'string' && { published_at: obj.datePublished }),
|
|
74
|
+
...(typeof obj.dateModified === 'string' && { modified_at: obj.dateModified }),
|
|
75
|
+
...(typeof obj.description === 'string' && { summary: obj.description }),
|
|
76
|
+
...(typeof obj.articleBody === 'string' && { content: obj.articleBody }),
|
|
77
|
+
...(typeof obj.inLanguage === 'string' && { language: obj.inLanguage }),
|
|
78
|
+
};
|
|
79
|
+
const author = obj.author;
|
|
80
|
+
if (typeof author === 'object' && author)
|
|
81
|
+
d.author = String(author.name ?? '');
|
|
82
|
+
else if (typeof author === 'string')
|
|
83
|
+
d.author = author;
|
|
84
|
+
out.push(d);
|
|
85
|
+
}
|
|
86
|
+
// Recurse into nested arrays/objects so we catch deep entities
|
|
87
|
+
for (const v of Object.values(obj))
|
|
88
|
+
visit(v, out);
|
|
89
|
+
}
|
|
90
|
+
function matchAll(s, re) {
|
|
91
|
+
const r = [];
|
|
92
|
+
let m;
|
|
93
|
+
while ((m = re.exec(s)) !== null)
|
|
94
|
+
r.push(m[1]);
|
|
95
|
+
return r;
|
|
96
|
+
}
|
|
97
|
+
function slug(s) {
|
|
98
|
+
return s
|
|
99
|
+
.toLowerCase()
|
|
100
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
101
|
+
.replace(/^-+|-+$/g, '')
|
|
102
|
+
.slice(0, 64) || 'unknown';
|
|
103
|
+
}
|
|
104
|
+
//# sourceMappingURL=schema-org.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema-org.js","sourceRoot":"","sources":["../../src/extractors/schema-org.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,MAAM,UAAU,oBAAoB,CAAC,IAAY;IAC/C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,QAAQ,CAAC,IAAI,EAAE,4EAA4E,CAAC,CAAC;IAC5G,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QAC3B,IAAI,CAAC;YACH,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAC/B,KAAK,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;QACxB,CAAC;QAAC,MAAM,CAAC;YACP,wBAAwB;QAC1B,CAAC;IACH,CAAC;IACD,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,QAAQ,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;AACzD,CAAC;AAED,SAAS,KAAK,CAAC,IAAa,EAAE,GAAa;IACzC,IAAI,CAAC,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QAAE,OAAO;IAC9C,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;QACxB,KAAK,MAAM,CAAC,IAAI,IAAI;YAAE,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;QACpC,OAAO;IACT,CAAC;IACD,MAAM,GAAG,GAAG,IAA+B,CAAC;IAC5C,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,CAAC,CAAC;IAE1B,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,mBAAmB,IAAI,IAAI,KAAK,cAAc,EAAE,CAAC;QAClF,MAAM,IAAI,GAAG,MAAM,CAAC,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;QACpC,MAAM,EAAE,GAAG,WAAW,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QACnC,MAAM,CAAC,GAAY;YACjB,EAAE;YACF,IAAI,EAAE,SAAS;YACf,IAAI;YACJ,GAAG,CAAC,OAAO,GAAG,CAAC,KAAK,KAAK,QAAQ,IAAI,GAAG,CAAC,KAAK,IAAI,EAAE,KAAK,EAAE,MAAM,CAAE,GAAG,CAAC,KAAiC,CAAC,IAAI,IAAI,EAAE,CAAC,EAAE,CAAC;YACvH,GAAG,CAAC,OAAO,GAAG,CAAC,WAAW,KAAK,QAAQ,IAAI,EAAE,WAAW,EAAE,GAAG,CAAC,WAAW,EAAE,CAAC;YAC5E,GAAG,CAAC,OAAO,GAAG,CAAC,GAAG,KAAK,QAAQ,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,GAAG,EAAE,CAAC;SACrD,CAAC;QACF,MAAM,MAAM,GAAG,GAAG,CAAC,MAA6C,CAAC;QACjE,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;YACzD,IAAI,KAAK,EAAE,CAAC;gBACV,IAAI,KAAK,CAAC,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,aAAa,EAAE,CAAC;oBACrD,CAAC,CAAC,KAAK,GAAG,EAAE,MAAM,EAAE,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,QAAQ,EAAE,MAAM,CAAC,KAAK,CAAC,aAAa,CAAC,EAAE,CAAC;gBACnF,CAAC;gBACD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;gBAC7D,IAAI,KAAK,CAAC,QAAQ,CAAC,SAAS,CAAC;oBAAE,CAAC,CAAC,KAAK,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;qBAC3D,IAAI,KAAK,CAAC,QAAQ,CAAC,YAAY,CAAC;oBAAE,CAAC,CAAC,KAAK,GAAG,EAAE,MAAM,EAAE,cAAc,EAAE,CAAC;qBACvE,IAAI,KAAK,CAAC,QAAQ,CAAC,UAAU,CAAC;oBAAE,CAAC,CAAC,KAAK,GAAG,EAAE,MAAM,EAAE,UAAU,EAAE,CAAC;YACxE,CAAC;QACH,CAAC;QACD,MAAM,MAAM,GAAG,GAAG,CAAC,eAAsD,CAAC;QAC1E,IAAI,MAAM,EAAE,CAAC;YACX,CAAC,CAAC,MAAM,GAAG;gBACT,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,WAAW,IAAI,CAAC,CAAC;gBACxC,KAAK,EAAE,MAAM,CAAC,MAAM,CAAC,WAAW,IAAI,MAAM,CAAC,WAAW,IAAI,CAAC,CAAC;aAC7D,CAAC;QACJ,CAAC;QACD,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACd,CAAC;SAAM,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,aAAa,IAAI,IAAI,KAAK,aAAa,EAAE,CAAC;QAClF,MAAM,KAAK,GAAG,MAAM,CAAC,GAAG,CAAC,QAAQ,IAAI,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;QACrD,MAAM,CAAC,GAAa;YAClB,EAAE,EAAE,YAAY,IAAI,CAAC,KAAK,CAAC,EAAE;YAC7B,IAAI,EAAE,UAAU;YAChB,KAAK;YACL,GAAG,CAAC,OAAO,GAAG,CAAC,aAAa,KAAK,QAAQ,IAAI,EAAE,YAAY,EAAE,GAAG,CAAC,aAAa,EAAE,CAAC;YACjF,GAAG,CAAC,OAAO,GAAG,CAAC,YAAY,KAAK,QAAQ,IAAI,EAAE,WAAW,EAAE,GAAG,CAAC,YAAY,EAAE,CAAC;YAC9E,GAAG,CAAC,OAAO,GAAG,CAAC,WAAW,KAAK,QAAQ,IAAI,EAAE,OAAO,EAAE,GAAG,CAAC,WAAW,EAAE,CAAC;YACxE,GAAG,CAAC,OAAO,GAAG,CAAC,WAAW,KAAK,QAAQ,IAAI,EAAE,OAAO,EAAE,GAAG,CAAC,WAAW,EAAE,CAAC;YACxE,GAAG,CAAC,OAAO,GAAG,CAAC,UAAU,KAAK,QAAQ,IAAI,EAAE,QAAQ,EAAE,GAAG,CAAC,UAAU,EAAE,CAAC;SACxE,CAAC;QACF,MAAM,MAAM,GAAG,GAAG,CAAC,MAAM,CAAC;QAC1B,IAAI,OAAO,MAAM,KAAK,QAAQ,IAAI,MAAM;YAAE,CAAC,CAAC,MAAM,GAAG,MAAM,CAAE,MAAkC,CAAC,IAAI,IAAI,EAAE,CAAC,CAAC;aACvG,IAAI,OAAO,MAAM,KAAK,QAAQ;YAAE,CAAC,CAAC,MAAM,GAAG,MAAM,CAAC;QACvD,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACd,CAAC;IAED,+DAA+D;IAC/D,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,MAAM,CAAC,GAAG,CAAC;QAAE,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;AACpD,CAAC;AAED,SAAS,QAAQ,CAAC,CAAS,EAAE,EAAU;IACrC,MAAM,CAAC,GAAa,EAAE,CAAC;IACvB,IAAI,CAAyB,CAAC;IAC9B,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI;QAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAE,CAAC,CAAC;IAChD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,IAAI,CAAC,CAAS;IACrB,OAAO,CAAC;SACL,WAAW,EAAE;SACb,OAAO,CAAC,aAAa,EAAE,GAAG,CAAC;SAC3B,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;SACvB,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,SAAS,CAAC;AAC/B,CAAC"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-route snapshot handler factory.
|
|
3
|
+
*
|
|
4
|
+
* Usage in a Next.js App Router project:
|
|
5
|
+
*
|
|
6
|
+
* // app/ahtml/[...path]/route.ts
|
|
7
|
+
* import { createAHTMLRoute } from '@ahtmljs/next/handler';
|
|
8
|
+
* import { buildSnapshotForPath } from '../../lib/ahtml';
|
|
9
|
+
* export const { GET, HEAD } = createAHTMLRoute(buildSnapshotForPath);
|
|
10
|
+
*
|
|
11
|
+
* The handler supports:
|
|
12
|
+
* - Content negotiation:
|
|
13
|
+
* Accept: application/ahtml+json → canonical JSON
|
|
14
|
+
* Accept: application/ahtml+text → token-optimal compact text (default)
|
|
15
|
+
* - Conditional GET via If-None-Match → 304
|
|
16
|
+
* - Diff endpoint via ?since=<etag> → SnapshotDiff
|
|
17
|
+
* - ETag, Cache-Control, Last-Modified headers
|
|
18
|
+
* - Optional policy enforcement (rate limit / auth gate)
|
|
19
|
+
*/
|
|
20
|
+
import { type Snapshot } from '@ahtmljs/schema';
|
|
21
|
+
import { type AHTMLConfig } from './index.js';
|
|
22
|
+
export type SnapshotBuilder = (pathSegments: string[], req: Request) => Promise<Snapshot | null> | Snapshot | null;
|
|
23
|
+
export declare function setSnapshotCache(impl: {
|
|
24
|
+
get(key: string): Snapshot | undefined;
|
|
25
|
+
set(key: string, s: Snapshot): void;
|
|
26
|
+
}): void;
|
|
27
|
+
export declare function createAHTMLRoute(builder: SnapshotBuilder, configOverride?: AHTMLConfig): {
|
|
28
|
+
GET: (req: Request, ctx: {
|
|
29
|
+
params: Promise<{
|
|
30
|
+
path?: string[];
|
|
31
|
+
}> | {
|
|
32
|
+
path?: string[];
|
|
33
|
+
};
|
|
34
|
+
}) => Promise<Response>;
|
|
35
|
+
HEAD: (req: Request, ctx: {
|
|
36
|
+
params: Promise<{
|
|
37
|
+
path?: string[];
|
|
38
|
+
}> | {
|
|
39
|
+
path?: string[];
|
|
40
|
+
};
|
|
41
|
+
}) => Promise<Response>;
|
|
42
|
+
};
|
|
43
|
+
//# sourceMappingURL=handler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"handler.d.ts","sourceRoot":"","sources":["../src/handler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;GAkBG;AAEH,OAAO,EAKL,KAAK,QAAQ,EACd,MAAM,iBAAiB,CAAC;AACzB,OAAO,EAAa,KAAK,WAAW,EAAE,MAAM,YAAY,CAAC;AAGzD,MAAM,MAAM,eAAe,GAAG,CAC5B,YAAY,EAAE,MAAM,EAAE,EACtB,GAAG,EAAE,OAAO,KACT,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,GAAG,QAAQ,GAAG,IAAI,CAAC;AAQhD,wBAAgB,gBAAgB,CAAC,IAAI,EAAE;IAAE,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,QAAQ,GAAG,SAAS,CAAC;IAAC,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,CAAC,EAAE,QAAQ,GAAG,IAAI,CAAA;CAAE,GAAG,IAAI,CAG5H;AAWD,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,eAAe,EAAE,cAAc,CAAC,EAAE,WAAW;eAC7D,OAAO,OAAO;QAAE,MAAM,EAAE,OAAO,CAAC;YAAE,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;SAAE,CAAC,GAAG;YAAE,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;SAAE,CAAA;KAAE,KAAG,OAAO,CAAC,QAAQ,CAAC;gBAwE/F,OAAO,OAAO;QAAE,MAAM,EAAE,OAAO,CAAC;YAAE,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;SAAE,CAAC,GAAG;YAAE,IAAI,CAAC,EAAE,MAAM,EAAE,CAAA;SAAE,CAAA;KAAE,KAAG,OAAO,CAAC,QAAQ,CAAC;EAM1H"}
|