@zenalexa/unicli 0.221.0 → 0.221.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/AGENTS.md +12 -12
  2. package/README.md +13 -11
  3. package/README.zh-CN.md +13 -11
  4. package/dist/adapters/acl-anthology/papers.d.ts +16 -0
  5. package/dist/adapters/acl-anthology/papers.d.ts.map +1 -0
  6. package/dist/adapters/acl-anthology/papers.js +135 -0
  7. package/dist/adapters/acl-anthology/papers.js.map +1 -0
  8. package/dist/adapters/arxiv/papers.js +2 -0
  9. package/dist/adapters/arxiv/papers.js.map +1 -1
  10. package/dist/adapters/baidu-scholar/search.js +5 -0
  11. package/dist/adapters/baidu-scholar/search.js.map +1 -1
  12. package/dist/adapters/crossref/works.d.ts +42 -0
  13. package/dist/adapters/crossref/works.d.ts.map +1 -0
  14. package/dist/adapters/crossref/works.js +157 -0
  15. package/dist/adapters/crossref/works.js.map +1 -0
  16. package/dist/adapters/cvf/papers.d.ts +17 -0
  17. package/dist/adapters/cvf/papers.d.ts.map +1 -0
  18. package/dist/adapters/cvf/papers.js +124 -0
  19. package/dist/adapters/cvf/papers.js.map +1 -0
  20. package/dist/adapters/dblp/publications.js +4 -0
  21. package/dist/adapters/dblp/publications.js.map +1 -1
  22. package/dist/adapters/google-scholar/cite.js +1 -0
  23. package/dist/adapters/google-scholar/cite.js.map +1 -1
  24. package/dist/adapters/google-scholar/profile.js +5 -0
  25. package/dist/adapters/google-scholar/profile.js.map +1 -1
  26. package/dist/adapters/google-scholar/search.js +5 -0
  27. package/dist/adapters/google-scholar/search.js.map +1 -1
  28. package/dist/adapters/hf/paper.js +1 -0
  29. package/dist/adapters/hf/paper.js.map +1 -1
  30. package/dist/adapters/neurips/proceedings.d.ts +17 -0
  31. package/dist/adapters/neurips/proceedings.d.ts.map +1 -0
  32. package/dist/adapters/neurips/proceedings.js +112 -0
  33. package/dist/adapters/neurips/proceedings.js.map +1 -0
  34. package/dist/adapters/openalex/works.d.ts.map +1 -1
  35. package/dist/adapters/openalex/works.js +32 -0
  36. package/dist/adapters/openalex/works.js.map +1 -1
  37. package/dist/adapters/openreview/papers.js +5 -0
  38. package/dist/adapters/openreview/papers.js.map +1 -1
  39. package/dist/adapters/pmlr/proceedings.d.ts +35 -0
  40. package/dist/adapters/pmlr/proceedings.d.ts.map +1 -0
  41. package/dist/adapters/pmlr/proceedings.js +139 -0
  42. package/dist/adapters/pmlr/proceedings.js.map +1 -0
  43. package/dist/adapters/pubmed/articles.js +5 -0
  44. package/dist/adapters/pubmed/articles.js.map +1 -1
  45. package/dist/adapters/semantic-scholar/papers.d.ts +36 -0
  46. package/dist/adapters/semantic-scholar/papers.d.ts.map +1 -0
  47. package/dist/adapters/semantic-scholar/papers.js +214 -0
  48. package/dist/adapters/semantic-scholar/papers.js.map +1 -0
  49. package/dist/adapters/unpaywall/works.d.ts +33 -0
  50. package/dist/adapters/unpaywall/works.d.ts.map +1 -0
  51. package/dist/adapters/unpaywall/works.js +101 -0
  52. package/dist/adapters/unpaywall/works.js.map +1 -0
  53. package/dist/cli.d.ts.map +1 -1
  54. package/dist/cli.js +15 -1
  55. package/dist/cli.js.map +1 -1
  56. package/dist/commands/do.d.ts +30 -0
  57. package/dist/commands/do.d.ts.map +1 -0
  58. package/dist/commands/do.js +248 -0
  59. package/dist/commands/do.js.map +1 -0
  60. package/dist/commands/extract.d.ts +34 -0
  61. package/dist/commands/extract.d.ts.map +1 -0
  62. package/dist/commands/extract.js +316 -0
  63. package/dist/commands/extract.js.map +1 -0
  64. package/dist/commands/scholar.d.ts +33 -0
  65. package/dist/commands/scholar.d.ts.map +1 -0
  66. package/dist/commands/scholar.js +494 -0
  67. package/dist/commands/scholar.js.map +1 -0
  68. package/dist/commands/search.d.ts.map +1 -1
  69. package/dist/commands/search.js +2 -5
  70. package/dist/commands/search.js.map +1 -1
  71. package/dist/discovery/aliases.d.ts +2 -2
  72. package/dist/discovery/aliases.d.ts.map +1 -1
  73. package/dist/discovery/aliases.js +182 -11
  74. package/dist/discovery/aliases.js.map +1 -1
  75. package/dist/discovery/intents.d.ts +10 -0
  76. package/dist/discovery/intents.d.ts.map +1 -0
  77. package/dist/discovery/intents.js +255 -0
  78. package/dist/discovery/intents.js.map +1 -0
  79. package/dist/discovery/search.d.ts +4 -1
  80. package/dist/discovery/search.d.ts.map +1 -1
  81. package/dist/discovery/search.js +28 -140
  82. package/dist/discovery/search.js.map +1 -1
  83. package/dist/fast-path/handlers/discovery.d.ts.map +1 -1
  84. package/dist/fast-path/handlers/discovery.js +17 -3
  85. package/dist/fast-path/handlers/discovery.js.map +1 -1
  86. package/dist/manifest-compact.txt +13 -11
  87. package/dist/manifest-search.json +1 -1
  88. package/dist/manifest.json +462 -68
  89. package/dist/mcp/handler.d.ts.map +1 -1
  90. package/dist/mcp/handler.js +14 -2
  91. package/dist/mcp/handler.js.map +1 -1
  92. package/dist/mcp/tools.d.ts.map +1 -1
  93. package/dist/mcp/tools.js +11 -3
  94. package/dist/mcp/tools.js.map +1 -1
  95. package/dist/registry.d.ts +1 -0
  96. package/dist/registry.d.ts.map +1 -1
  97. package/dist/registry.js +5 -0
  98. package/dist/registry.js.map +1 -1
  99. package/dist/types/scholarly.d.ts +49 -0
  100. package/dist/types/scholarly.d.ts.map +1 -0
  101. package/dist/types/scholarly.js +16 -0
  102. package/dist/types/scholarly.js.map +1 -0
  103. package/package.json +1 -1
  104. package/server.json +2 -2
  105. package/skills/unicli/SKILL.md +1 -1
  106. package/skills/unicli-claude-code/SKILL.md +1 -1
  107. package/skills/unicli-hermes/SKILL.md +1 -1
  108. package/src/adapters/acl-anthology/papers.ts +157 -0
  109. package/src/adapters/arxiv/download.yaml +1 -1
  110. package/src/adapters/arxiv/paper.yaml +1 -1
  111. package/src/adapters/arxiv/papers.ts +2 -0
  112. package/src/adapters/arxiv/search.yaml +1 -1
  113. package/src/adapters/arxiv/trending.yaml +1 -1
  114. package/src/adapters/baidu-scholar/search.ts +5 -0
  115. package/src/adapters/crossref/works.ts +209 -0
  116. package/src/adapters/cvf/papers.ts +136 -0
  117. package/src/adapters/dblp/publications.ts +4 -0
  118. package/src/adapters/google-scholar/cite.ts +1 -0
  119. package/src/adapters/google-scholar/profile.ts +5 -0
  120. package/src/adapters/google-scholar/search.ts +5 -0
  121. package/src/adapters/hf/paper.test.ts +10 -0
  122. package/src/adapters/hf/paper.ts +1 -0
  123. package/src/adapters/hf/top.yaml +1 -1
  124. package/src/adapters/huggingface-papers/daily.yaml +1 -1
  125. package/src/adapters/huggingface-papers/search.yaml +1 -1
  126. package/src/adapters/neurips/proceedings.ts +126 -0
  127. package/src/adapters/openalex/works.ts +33 -0
  128. package/src/adapters/openreview/papers.ts +5 -0
  129. package/src/adapters/pmlr/proceedings.ts +167 -0
  130. package/src/adapters/pubmed/articles.ts +5 -0
  131. package/src/adapters/semantic-scholar/papers.ts +268 -0
  132. package/src/adapters/unpaywall/works.ts +138 -0
  133. package/src/adapters/zotero/search.yaml +1 -1
@@ -0,0 +1,30 @@
1
+ /**
2
+ * @owner src/commands/do.ts
3
+ * @does One-call intent → ranked plan. Natural-language input maps to
4
+ * the best-fitting adapter command, and the envelope's
5
+ * next_actions field carries an executable, schema-aware
6
+ * invocation template. Intentionally plan-only — agents
7
+ * explicitly invoke the suggested command on the second hop
8
+ * (mirrors REST HATEOAS; avoids ambiguous-intent triggering
9
+ * irreversible adapter writes).
10
+ * @needs commander, src/discovery/search, src/registry,
11
+ * src/commands/describe (describeCommand), src/output/{envelope,formatter}
12
+ * @feeds src/cli.ts agent entrypoint; complements `unicli search`
13
+ * (set semantics) with action semantics ("give me the answer").
14
+ * @breaks Emits `empty_result` envelope (exit 66) when no adapter
15
+ * scores above the floor. Otherwise always success path —
16
+ * this command does not perform network calls or writes.
17
+ * @invariants Never auto-executes the matched command. Output envelope's
18
+ * next_actions[0] is the recommended invocation; the agent
19
+ * must call it explicitly.
20
+ * @side-effects None — local index lookup only.
21
+ * @perf O(N) over BM25 index already loaded; <10ms cold per
22
+ * discovery/search.ts header.
23
+ * @concurrency Pure; no shared state.
24
+ * @test tests/unit/commands/do.test.ts
25
+ * @stability experimental
26
+ * @since 2026-05-18
27
+ */
28
+ import { Command } from "commander";
29
+ export declare function registerDoCommand(program: Command): void;
30
+ //# sourceMappingURL=do.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"do.d.ts","sourceRoot":"","sources":["../../src/commands/do.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AA8BpC,wBAAgB,iBAAiB,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CA2GxD"}
@@ -0,0 +1,248 @@
1
+ /**
2
+ * @owner src/commands/do.ts
3
+ * @does One-call intent → ranked plan. Natural-language input maps to
4
+ * the best-fitting adapter command, and the envelope's
5
+ * next_actions field carries an executable, schema-aware
6
+ * invocation template. Intentionally plan-only — agents
7
+ * explicitly invoke the suggested command on the second hop
8
+ * (mirrors REST HATEOAS; avoids ambiguous-intent triggering
9
+ * irreversible adapter writes).
10
+ * @needs commander, src/discovery/search, src/registry,
11
+ * src/commands/describe (describeCommand), src/output/{envelope,formatter}
12
+ * @feeds src/cli.ts agent entrypoint; complements `unicli search`
13
+ * (set semantics) with action semantics ("give me the answer").
14
+ * @breaks Emits `empty_result` envelope (exit 66) when no adapter
15
+ * scores above the floor. Otherwise always success path —
16
+ * this command does not perform network calls or writes.
17
+ * @invariants Never auto-executes the matched command. Output envelope's
18
+ * next_actions[0] is the recommended invocation; the agent
19
+ * must call it explicitly.
20
+ * @side-effects None — local index lookup only.
21
+ * @perf O(N) over BM25 index already loaded; <10ms cold per
22
+ * discovery/search.ts header.
23
+ * @concurrency Pure; no shared state.
24
+ * @test tests/unit/commands/do.test.ts
25
+ * @stability experimental
26
+ * @since 2026-05-18
27
+ */
28
+ import { search } from "../discovery/search.js";
29
+ import { getAdapter, resolveCommand } from "../registry.js";
30
+ import { describeCommand } from "./describe.js";
31
+ import { format, detectFormat } from "../output/formatter.js";
32
+ const DEFAULT_TOP = 3;
33
+ const SCORE_FLOOR = 0.0;
34
+ export function registerDoCommand(program) {
35
+ program
36
+ .command("do <intent...>")
37
+ .description("Route a natural-language intent to the best-fit adapter command (plan-only; agent executes the suggested next_action)")
38
+ .option("-n, --top <n>", `Return top-N candidates (default ${DEFAULT_TOP})`, String(DEFAULT_TOP))
39
+ .option("--no-schema", "Omit args_schema and example_stdin from each match payload")
40
+ .action((intentParts, opts) => {
41
+ const startedAt = Date.now();
42
+ const fmt = detectFormat(program.opts().format);
43
+ const intent = intentParts.join(" ").trim();
44
+ let top;
45
+ try {
46
+ top = parseTop(opts.top);
47
+ }
48
+ catch (e) {
49
+ emitInvalidInput(startedAt, fmt, intent, e instanceof Error ? e.message : "invalid --top value", `Pass a positive integer up to ${TOP_HARD_LIMIT}`);
50
+ return;
51
+ }
52
+ const includeSchema = opts.schema !== false;
53
+ if (!intent) {
54
+ emitEmpty(startedAt, fmt, intent, "missing intent argument");
55
+ return;
56
+ }
57
+ const results = search(intent, top);
58
+ const filtered = results.filter((r) => r.score > SCORE_FLOOR);
59
+ if (filtered.length === 0) {
60
+ emitEmpty(startedAt, fmt, intent, "no adapter scored above the floor");
61
+ return;
62
+ }
63
+ const matches = filtered.map((r) => {
64
+ const m = {
65
+ site: r.site,
66
+ command: r.command,
67
+ score: round(r.score, 4),
68
+ description: r.description,
69
+ category: r.category,
70
+ };
71
+ if (includeSchema) {
72
+ const resolved = resolveCommand(r.site, r.command);
73
+ if (resolved) {
74
+ const adapter = getAdapter(r.site);
75
+ const desc = describeCommand(r.site, r.command, resolved.command, adapter);
76
+ const argsSchema = desc.args_schema;
77
+ const example = desc.example_stdin;
78
+ if (argsSchema !== undefined)
79
+ m.args_schema = argsSchema;
80
+ if (example !== undefined)
81
+ m.example_stdin = example;
82
+ }
83
+ }
84
+ return m;
85
+ });
86
+ const best = matches[0];
87
+ const data = {
88
+ intent,
89
+ match: best
90
+ ? {
91
+ site: best.site,
92
+ command: best.command,
93
+ score: best.score,
94
+ category: best.category,
95
+ description: best.description,
96
+ invocation: `unicli ${best.site} ${best.command}`,
97
+ }
98
+ : null,
99
+ candidates: matches,
100
+ };
101
+ const ctx = {
102
+ command: "core.do",
103
+ duration_ms: Date.now() - startedAt,
104
+ surface: "web",
105
+ next_actions: best ? successNextActions(intent, best, matches) : [],
106
+ };
107
+ console.log(format(data, undefined, fmt, ctx));
108
+ });
109
+ }
110
+ // ── helpers ──────────────────────────────────────────────────────────────────
111
+ const TOP_HARD_LIMIT = 25;
112
+ /**
113
+ * Parse `--top`. Throws on invalid input — caller converts to an
114
+ * `invalid_input` envelope. Bad CLI input is a caller bug, not a system
115
+ * state to silently snap to a default (rule 02).
116
+ */
117
+ function parseTop(raw) {
118
+ const n = Number(raw);
119
+ if (!Number.isFinite(n) || !Number.isInteger(n) || n <= 0) {
120
+ throw new Error(`--top must be a positive integer (got "${raw}")`);
121
+ }
122
+ if (n > TOP_HARD_LIMIT) {
123
+ throw new Error(`--top ${n} exceeds hard limit ${TOP_HARD_LIMIT}`);
124
+ }
125
+ return n;
126
+ }
127
+ function round(n, digits) {
128
+ const m = 10 ** digits;
129
+ return Math.round(n * m) / m;
130
+ }
131
+ function successNextActions(intent, best, matches) {
132
+ const actions = [];
133
+ // Primary: invoke the top match
134
+ const params = argsSchemaToParams(best.args_schema);
135
+ actions.push({
136
+ command: `unicli ${best.site} ${best.command}`,
137
+ description: `Invoke the top-scored match (${best.score})`,
138
+ ...(params ? { params } : {}),
139
+ });
140
+ // Schema introspection
141
+ actions.push({
142
+ command: `unicli describe ${best.site} ${best.command}`,
143
+ description: "Read the full schema, channels, and example payload",
144
+ });
145
+ // Stdin-JSON channel for payloads with quoting hazards
146
+ actions.push({
147
+ command: `echo '{}' | unicli ${best.site} ${best.command}`,
148
+ description: "Stdin-JSON channel — use when params contain quotes/emoji/JSON",
149
+ });
150
+ // Surface a runner-up if it scored close to the top
151
+ if (matches.length > 1 &&
152
+ matches[1] &&
153
+ matches[1].score >= best.score * 0.7) {
154
+ const runner = matches[1];
155
+ actions.push({
156
+ command: `unicli ${runner.site} ${runner.command}`,
157
+ description: `Runner-up (score ${runner.score}) — consider if top match misreads intent`,
158
+ });
159
+ }
160
+ // Broaden if the agent wants the full ranked list
161
+ actions.push({
162
+ command: `unicli search "${intent}"`,
163
+ description: "List all matching candidates with scores",
164
+ });
165
+ return actions;
166
+ }
167
+ /**
168
+ * Walk a JSON-schema args object and produce AgentNextActionParam hints —
169
+ * we surface the description and the closed-set `enum` when present so the
170
+ * agent fills the template without re-reading docs.
171
+ */
172
+ function argsSchemaToParams(schema) {
173
+ if (schema === undefined)
174
+ return undefined;
175
+ if (typeof schema === "string")
176
+ return undefined;
177
+ const properties = schema.properties ?? undefined;
178
+ if (!properties)
179
+ return undefined;
180
+ const out = {};
181
+ for (const [name, prop] of Object.entries(properties)) {
182
+ const param = {};
183
+ if (typeof prop.description === "string") {
184
+ param.description = prop.description;
185
+ }
186
+ if (Array.isArray(prop.enum) &&
187
+ prop.enum.every((v) => typeof v === "string" || typeof v === "number")) {
188
+ param.enum = prop.enum;
189
+ }
190
+ if (prop.default !== undefined &&
191
+ (typeof prop.default === "string" ||
192
+ typeof prop.default === "number" ||
193
+ typeof prop.default === "boolean")) {
194
+ param.default = prop.default;
195
+ }
196
+ if (Object.keys(param).length > 0) {
197
+ out[name] = param;
198
+ }
199
+ }
200
+ return Object.keys(out).length > 0 ? out : undefined;
201
+ }
202
+ function emitEmpty(startedAt, fmt, intent, reason) {
203
+ const ctx = {
204
+ command: "core.do",
205
+ duration_ms: Date.now() - startedAt,
206
+ surface: "web",
207
+ next_actions: [
208
+ {
209
+ command: `unicli search "${intent || "<query>"}"`,
210
+ description: "Broaden the query — list all candidates with scores",
211
+ },
212
+ {
213
+ command: `unicli describe`,
214
+ description: "Browse the site index for the right vertical",
215
+ },
216
+ ],
217
+ error: {
218
+ code: "empty_result",
219
+ message: `No adapter matched intent: ${reason}`,
220
+ retryable: false,
221
+ suggestion: "Use simpler keywords or run `unicli describe` to see the full catalogue",
222
+ },
223
+ };
224
+ process.exitCode = 66;
225
+ console.log(format(null, undefined, fmt, ctx));
226
+ }
227
+ function emitInvalidInput(startedAt, fmt, intent, message, suggestion) {
228
+ const ctx = {
229
+ command: "core.do",
230
+ duration_ms: Date.now() - startedAt,
231
+ surface: "web",
232
+ next_actions: [
233
+ {
234
+ command: `unicli search "${intent || "<query>"}"`,
235
+ description: "List all candidates without --top constraints",
236
+ },
237
+ ],
238
+ error: {
239
+ code: "invalid_input",
240
+ message,
241
+ retryable: false,
242
+ suggestion,
243
+ },
244
+ };
245
+ process.exitCode = 2;
246
+ console.log(format(null, undefined, fmt, ctx));
247
+ }
248
+ //# sourceMappingURL=do.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"do.js","sourceRoot":"","sources":["../../src/commands/do.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AAGH,OAAO,EAAE,MAAM,EAAE,MAAM,wBAAwB,CAAC;AAChD,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAC5D,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAQ9D,MAAM,WAAW,GAAG,CAAC,CAAC;AACtB,MAAM,WAAW,GAAG,GAAG,CAAC;AAiBxB,MAAM,UAAU,iBAAiB,CAAC,OAAgB;IAChD,OAAO;SACJ,OAAO,CAAC,gBAAgB,CAAC;SACzB,WAAW,CACV,uHAAuH,CACxH;SACA,MAAM,CACL,eAAe,EACf,oCAAoC,WAAW,GAAG,EAClD,MAAM,CAAC,WAAW,CAAC,CACpB;SACA,MAAM,CACL,aAAa,EACb,4DAA4D,CAC7D;SACA,MAAM,CAAC,CAAC,WAAqB,EAAE,IAAY,EAAE,EAAE;QAC9C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,YAAY,CACtB,OAAO,CAAC,IAAI,EAAE,CAAC,MAAkC,CAClD,CAAC;QACF,MAAM,MAAM,GAAG,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAE5C,IAAI,GAAW,CAAC;QAChB,IAAI,CAAC;YACH,GAAG,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC3B,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,gBAAgB,CACd,SAAS,EACT,GAAG,EACH,MAAM,EACN,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,qBAAqB,EACtD,iCAAiC,cAAc,EAAE,CAClD,CAAC;YACF,OAAO;QACT,CAAC;QACD,MAAM,aAAa,GAAG,IAAI,CAAC,MAAM,KAAK,KAAK,CAAC;QAE5C,IAAI,CAAC,MAAM,EAAE,CAAC;YACZ,SAAS,CAAC,SAAS,EAAE,GAAG,EAAE,MAAM,EAAE,yBAAyB,CAAC,CAAC;YAC7D,OAAO;QACT,CAAC;QAED,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QACpC,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,WAAW,CAAC,CAAC;QAE9D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC1B,SAAS,CAAC,SAAS,EAAE,GAAG,EAAE,MAAM,EAAE,mCAAmC,CAAC,CAAC;YACvE,OAAO;QACT,CAAC;QAED,MAAM,OAAO,GAAmB,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YACjD,MAAM,CAAC,GAAiB;gBACtB,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,OAAO,EAAE,CAAC,CAAC,OAAO;gBAClB,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC;gBACxB,WAAW,EAAE,CAAC,CAAC,WAAW;gBAC1B,QAAQ,EAAE,CAAC,CAAC,QAAQ;aACrB,CAAC;YACF,IAAI,aAAa,EAAE,CAAC;gBAClB,MAAM,QAAQ,GAAG,cAAc,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC;gBACnD,IAAI,QAAQ,EAAE,CAAC;oBACb,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;oBACnC,MAAM,IAAI,GAAG,eAAe,CAC1B,CAAC,CAAC,IAAI,EACN,CAAC,CAAC,OAAO,EACT,QAAQ,CAAC,OAAO,EAChB,OAAO,CACR,CAAC;oBACF,MAAM,UAAU,GAAG,IAAI,CAAC,WAGX,CAAC;oBACd,MAAM,OAAO,GAAG,IAAI,CAAC,aAGR,CAAC;oBACd,IAAI,UAAU,KAAK,SAAS;wBAAE,CAAC,CAAC,WAAW,GAAG,UAAU,CAAC;oBACzD,IAAI,OAAO,KAAK,SAAS;wBAAE,CAAC,CAAC,aAAa,GAAG,OAAO,CAAC;gBACvD,CAAC;YACH,CAAC;YACD,OAAO,CAAC,CAAC;QACX,CAAC,CAAC,CAAC;QAEH,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QACxB,MAAM,IAAI,GAA4B;YACpC,MAAM;YACN,KAAK,EAAE,IAAI;gBACT,CAAC,CAAC;oBACE,IAAI,EAAE,IAAI,CAAC,IAAI;oBACf,OAAO,EAAE,IAAI,CAAC,OAAO;oBACrB,KAAK,EAAE,IAAI,CAAC,KAAK;oBACjB,QAAQ,EAAE,IAAI,CAAC,QAAQ;oBACvB,WAAW,EAAE,IAAI,CAAC,WAAW;oBAC7B,UAAU,EAAE,UAAU,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,OAAO,EAAE;iBAClD;gBACH,CAAC,CAAC,IAAI;YACR,UAAU,EAAE,OAAO;SACpB,CAAC;QAEF,MAAM,GAAG,GAAiB;YACxB,OAAO,EAAE,SAAS;YAClB,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YACnC,OAAO,EAAE,KAAK;YACd,YAAY,EAAE,IAAI,CAAC,CAAC,CAAC,kBAAkB,CAAC,MAAM,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE;SACpE,CAAC;QACF,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;AACP,CAAC;AAED,gFAAgF;AAEhF,MAAM,cAAc,GAAG,EAAE,CAAC;AAE1B;;;;GAIG;AACH,SAAS,QAAQ,CAAC,GAAW;IAC3B,MAAM,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;IACtB,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CAAC,0CAA0C,GAAG,IAAI,CAAC,CAAC;IACrE,CAAC;IACD,IAAI,CAAC,GAAG,cAAc,EAAE,CAAC;QACvB,MAAM,IAAI,KAAK,CAAC,SAAS,CAAC,uBAAuB,cAAc,EAAE,CAAC,CAAC;IACrE,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,KAAK,CAAC,CAAS,EAAE,MAAc;IACtC,MAAM,CAAC,GAAG,EAAE,IAAI,MAAM,CAAC;IACvB,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;AAC/B,CAAC;AAED,SAAS,kBAAkB,CACzB,MAAc,EACd,IAAkB,EAClB,OAAuB;IAEvB,MAAM,OAAO,GAAsB,EAAE,CAAC;IAEtC,gCAAgC;IAChC,MAAM,MAAM,GAAG,kBAAkB,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACpD,OAAO,CAAC,IAAI,CAAC;QACX,OAAO,EAAE,UAAU,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,OAAO,EAAE;QAC9C,WAAW,EAAE,gCAAgC,IAAI,CAAC,KAAK,GAAG;QAC1D,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KAC9B,CAAC,CAAC;IAEH,uBAAuB;IACvB,OAAO,CAAC,IAAI,CAAC;QACX,OAAO,EAAE,mBAAmB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,OAAO,EAAE;QACvD,WAAW,EAAE,qDAAqD;KACnE,CAAC,CAAC;IAEH,uDAAuD;IACvD,OAAO,CAAC,IAAI,CAAC;QACX,OAAO,EAAE,sBAAsB,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,OAAO,EAAE;QAC1D,WAAW,EACT,gEAAgE;KACnE,CAAC,CAAC;IAEH,oDAAoD;IACpD,IACE,OAAO,CAAC,MAAM,GAAG,CAAC;QAClB,OAAO,CAAC,CAAC,CAAC;QACV,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,KAAK,GAAG,GAAG,EACpC,CAAC;QACD,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;QAC1B,OAAO,CAAC,IAAI,CAAC;YACX,OAAO,EAAE,UAAU,MAAM,CAAC,IAAI,IAAI,MAAM,CAAC,OAAO,EAAE;YAClD,WAAW,EAAE,oBAAoB,MAAM,CAAC,KAAK,2CAA2C;SACzF,CAAC,CAAC;IACL,CAAC;IAED,kDAAkD;IAClD,OAAO,CAAC,IAAI,CAAC;QACX,OAAO,EAAE,kBAAkB,MAAM,GAAG;QACpC,WAAW,EAAE,0CAA0C;KACxD,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;GAIG;AACH,SAAS,kBAAkB,CACzB,MAAoD;IAEpD,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,SAAS,CAAC;IAC3C,IAAI,OAAO,MAAM,KAAK,QAAQ;QAAE,OAAO,SAAS,CAAC;IACjD,MAAM,UAAU,GACb,MAAM,CAAC,UAEM,IAAI,SAAS,CAAC;IAC9B,IAAI,CAAC,UAAU;QAAE,OAAO,SAAS,CAAC;IAClC,MAAM,GAAG,GAAyC,EAAE,CAAC;IACrD,KAAK,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;QACtD,MAAM,KAAK,GAAyB,EAAE,CAAC;QACvC,IAAI,OAAO,IAAI,CAAC,WAAW,KAAK,QAAQ,EAAE,CAAC;YACzC,KAAK,CAAC,WAAW,GAAG,IAAI,CAAC,WAAW,CAAC;QACvC,CAAC;QACD,IACE,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;YACxB,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,QAAQ,IAAI,OAAO,CAAC,KAAK,QAAQ,CAAC,EACtE,CAAC;YACD,KAAK,CAAC,IAAI,GAAG,IAAI,CAAC,IAA8B,CAAC;QACnD,CAAC;QACD,IACE,IAAI,CAAC,OAAO,KAAK,SAAS;YAC1B,CAAC,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ;gBAC/B,OAAO,IAAI,CAAC,OAAO,KAAK,QAAQ;gBAChC,OAAO,IAAI,CAAC,OAAO,KAAK,SAAS,CAAC,EACpC,CAAC;YACD,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC;QAC/B,CAAC;QACD,IAAI,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAClC,GAAG,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC;QACpB,CAAC;IACH,CAAC;IACD,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC;AACvD,CAAC;AAED,SAAS,SAAS,CAChB,SAAiB,EACjB,GAAiB,EACjB,MAAc,EACd,MAAc;IAEd,MAAM,GAAG,GAAiB;QACxB,OAAO,EAAE,SAAS;QAClB,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;QACnC,OAAO,EAAE,KAAK;QACd,YAAY,EAAE;YACZ;gBACE,OAAO,EAAE,kBAAkB,MAAM,IAAI,SAAS,GAAG;gBACjD,WAAW,EAAE,qDAAqD;aACnE;YACD;gBACE,OAAO,EAAE,iBAAiB;gBAC1B,WAAW,EAAE,8CAA8C;aAC5D;SACF;QACD,KAAK,EAAE;YACL,IAAI,EAAE,cAAc;YACpB,OAAO,EAAE,8BAA8B,MAAM,EAAE;YAC/C,SAAS,EAAE,KAAK;YAChB,UAAU,EACR,yEAAyE;SAC5E;KACF,CAAC;IACF,OAAO,CAAC,QAAQ,GAAG,EAAE,CAAC;IACtB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AACjD,CAAC;AAED,SAAS,gBAAgB,CACvB,SAAiB,EACjB,GAAiB,EACjB,MAAc,EACd,OAAe,EACf,UAAkB;IAElB,MAAM,GAAG,GAAiB;QACxB,OAAO,EAAE,SAAS;QAClB,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;QACnC,OAAO,EAAE,KAAK;QACd,YAAY,EAAE;YACZ;gBACE,OAAO,EAAE,kBAAkB,MAAM,IAAI,SAAS,GAAG;gBACjD,WAAW,EAAE,+CAA+C;aAC7D;SACF;QACD,KAAK,EAAE;YACL,IAAI,EAAE,eAAe;YACrB,OAAO;YACP,SAAS,EAAE,KAAK;YAChB,UAAU;SACX;KACF,CAAC;IACF,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACrB,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AACjD,CAAC"}
@@ -0,0 +1,34 @@
1
+ /**
2
+ * @owner src/commands/extract.ts
3
+ * @does One-shot URL → cleaned Markdown/text/HTML extraction without
4
+ * adapter awareness. Stateless agent verb: fetch + render in a
5
+ * single CLI call, no browser session, no auth, no pipeline
6
+ * composition required.
7
+ * @needs turndown, commander, src/engine/ssrf, src/engine/proxy,
8
+ * src/output/{envelope,formatter}, src/constants
9
+ * @feeds src/cli.ts agent entrypoint; agents that want "fetch this URL
10
+ * as Markdown" without composing a fetch_text + html_to_md
11
+ * pipeline themselves.
12
+ * @breaks Emits structured envelopes. Codes:
13
+ * invalid_input — non-http(s) URL, SSRF block, body cap
14
+ * not_found — 404
15
+ * auth_required — 401/403
16
+ * rate_limited — 429
17
+ * api_error — other 4xx
18
+ * upstream_error — 5xx
19
+ * network_error — DNS/TCP/TLS failure or timeout
20
+ * Each error envelope carries next_actions with a retry hint,
21
+ * a `describe` link, and a `do` link.
22
+ * @invariants Truncates rendered content at --max-chars; never holds more
23
+ * than HARD_MAX_BYTES of upstream body in memory.
24
+ * @side-effects HTTP GET to user-supplied URL with proxy if configured.
25
+ * No local filesystem writes.
26
+ * @perf O(N) in body bytes for Turndown; N capped by HARD_MAX_BYTES.
27
+ * @concurrency Pure async; no shared state.
28
+ * @test tests/unit/commands/extract.test.ts
29
+ * @stability experimental
30
+ * @since 2026-05-18
31
+ */
32
+ import { Command } from "commander";
33
+ export declare function registerExtractCommand(program: Command): void;
34
+ //# sourceMappingURL=extract.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/commands/extract.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAEH,OAAO,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAuBpC,wBAAgB,sBAAsB,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAiL7D"}
@@ -0,0 +1,316 @@
1
+ /**
2
+ * @owner src/commands/extract.ts
3
+ * @does One-shot URL → cleaned Markdown/text/HTML extraction without
4
+ * adapter awareness. Stateless agent verb: fetch + render in a
5
+ * single CLI call, no browser session, no auth, no pipeline
6
+ * composition required.
7
+ * @needs turndown, commander, src/engine/ssrf, src/engine/proxy,
8
+ * src/output/{envelope,formatter}, src/constants
9
+ * @feeds src/cli.ts agent entrypoint; agents that want "fetch this URL
10
+ * as Markdown" without composing a fetch_text + html_to_md
11
+ * pipeline themselves.
12
+ * @breaks Emits structured envelopes. Codes:
13
+ * invalid_input — non-http(s) URL, SSRF block, body cap
14
+ * not_found — 404
15
+ * auth_required — 401/403
16
+ * rate_limited — 429
17
+ * api_error — other 4xx
18
+ * upstream_error — 5xx
19
+ * network_error — DNS/TCP/TLS failure or timeout
20
+ * Each error envelope carries next_actions with a retry hint,
21
+ * a `describe` link, and a `do` link.
22
+ * @invariants Truncates rendered content at --max-chars; never holds more
23
+ * than HARD_MAX_BYTES of upstream body in memory.
24
+ * @side-effects HTTP GET to user-supplied URL with proxy if configured.
25
+ * No local filesystem writes.
26
+ * @perf O(N) in body bytes for Turndown; N capped by HARD_MAX_BYTES.
27
+ * @concurrency Pure async; no shared state.
28
+ * @test tests/unit/commands/extract.test.ts
29
+ * @stability experimental
30
+ * @since 2026-05-18
31
+ */
32
+ import TurndownService from "turndown";
33
+ import { assertSafeRequestUrl } from "../engine/ssrf.js";
34
+ import { getProxyAgent } from "../engine/proxy.js";
35
+ import { USER_AGENT } from "../constants.js";
36
+ import { format, detectFormat } from "../output/formatter.js";
37
+ const DEFAULT_MAX_CHARS = 50_000;
38
+ const HARD_MAX_BYTES = 5_000_000;
39
+ export function registerExtractCommand(program) {
40
+ program
41
+ .command("extract <url>")
42
+ .description("Fetch a URL and return cleaned Markdown (one-shot, no browser/auth)")
43
+ .option("--max-chars <n>", `Truncate rendered content at N chars (default ${DEFAULT_MAX_CHARS})`, String(DEFAULT_MAX_CHARS))
44
+ .option("--as <format>", "Render content as markdown|text|html (default markdown)", "markdown")
45
+ .action(async (url, opts) => {
46
+ const startedAt = Date.now();
47
+ const fmt = detectFormat(program.opts().format);
48
+ let maxChars;
49
+ try {
50
+ maxChars = parseMaxChars(opts.maxChars);
51
+ }
52
+ catch (e) {
53
+ emitError(baseCtx(startedAt), {
54
+ code: "invalid_input",
55
+ message: e instanceof Error ? e.message : "invalid --max-chars value",
56
+ suggestion: `Pass a positive integer up to ${MAX_CHARS_HARD_LIMIT}`,
57
+ retryable: false,
58
+ }, fmt, url);
59
+ return;
60
+ }
61
+ const renderAs = parseExtractFormat(opts.as);
62
+ try {
63
+ assertSafeRequestUrl(url);
64
+ }
65
+ catch (e) {
66
+ emitError(baseCtx(startedAt), {
67
+ code: "invalid_input",
68
+ message: e instanceof Error ? e.message : "URL failed safety validation",
69
+ suggestion: "Use an http(s) URL; loopback / link-local / private ranges are blocked",
70
+ retryable: false,
71
+ }, fmt, url);
72
+ return;
73
+ }
74
+ let html;
75
+ let httpStatus = 0;
76
+ try {
77
+ const init = {
78
+ method: "GET",
79
+ headers: { "User-Agent": USER_AGENT },
80
+ };
81
+ const agent = getProxyAgent();
82
+ if (agent)
83
+ init.dispatcher = agent;
84
+ const resp = await fetch(url, init);
85
+ httpStatus = resp.status;
86
+ if (!resp.ok) {
87
+ emitError(baseCtx(startedAt), {
88
+ code: mapStatus(resp.status),
89
+ message: `HTTP ${resp.status} ${resp.statusText} from ${url}`,
90
+ suggestion: resp.status >= 500
91
+ ? "Upstream 5xx — retry after a short delay"
92
+ : resp.status === 429
93
+ ? "Rate-limited — back off and retry"
94
+ : resp.status === 401 || resp.status === 403
95
+ ? "Authenticated endpoint — try `unicli auth setup <site>`"
96
+ : `Check that ${url} is the canonical URL`,
97
+ retryable: resp.status >= 500 || resp.status === 429,
98
+ }, fmt, url);
99
+ return;
100
+ }
101
+ const lenHeader = resp.headers.get("content-length");
102
+ if (lenHeader && Number(lenHeader) > HARD_MAX_BYTES) {
103
+ emitError(baseCtx(startedAt), {
104
+ // REASON: oversized upstream payload is an upstream property,
105
+ // not caller error — surface as `upstream_error` (exit 69) so
106
+ // agent retry policy knows the URL itself cannot be re-fetched
107
+ // smaller. retryable=false because shrink-on-retry is unlikely.
108
+ code: "upstream_error",
109
+ message: `Content-Length ${lenHeader} exceeds hard cap ${HARD_MAX_BYTES}`,
110
+ suggestion: "Target a smaller URL or use a streaming adapter via `unicli search`",
111
+ retryable: false,
112
+ }, fmt, url);
113
+ return;
114
+ }
115
+ html = await resp.text();
116
+ if (html.length > HARD_MAX_BYTES) {
117
+ html = html.slice(0, HARD_MAX_BYTES);
118
+ }
119
+ }
120
+ catch (e) {
121
+ emitError(baseCtx(startedAt), {
122
+ code: "network_error",
123
+ message: e instanceof Error ? e.message : String(e),
124
+ suggestion: `Network fetch failed for ${url} — verify connectivity`,
125
+ retryable: true,
126
+ }, fmt, url);
127
+ return;
128
+ }
129
+ let content;
130
+ if (renderAs === "markdown") {
131
+ const turndown = new TurndownService({
132
+ headingStyle: "atx",
133
+ codeBlockStyle: "fenced",
134
+ });
135
+ content = turndown.turndown(html);
136
+ }
137
+ else if (renderAs === "text") {
138
+ content = stripTags(html);
139
+ }
140
+ else {
141
+ content = html;
142
+ }
143
+ const originalLength = content.length;
144
+ const truncated = originalLength > maxChars;
145
+ if (truncated)
146
+ content = content.slice(0, maxChars);
147
+ const ctx = {
148
+ command: "core.extract",
149
+ duration_ms: Date.now() - startedAt,
150
+ surface: "web",
151
+ next_actions: successNextActions(url, renderAs, truncated, originalLength),
152
+ };
153
+ const data = {
154
+ url,
155
+ format: renderAs,
156
+ http_status: httpStatus,
157
+ length: content.length,
158
+ original_length: originalLength,
159
+ truncated,
160
+ content,
161
+ };
162
+ console.log(format(data, undefined, fmt, ctx));
163
+ });
164
+ }
165
+ // ── helpers ──────────────────────────────────────────────────────────────────
166
+ function baseCtx(startedAt) {
167
+ return {
168
+ command: "core.extract",
169
+ duration_ms: Date.now() - startedAt,
170
+ surface: "web",
171
+ };
172
+ }
173
+ /**
174
+ * Parse `--max-chars`. Throws on invalid input — caller is responsible for
175
+ * converting the throw into a structured `invalid_input` envelope. This is
176
+ * the rule-02 contract: bad CLI input is a caller bug, not a system state
177
+ * to silently recover from.
178
+ */
179
+ const MAX_CHARS_HARD_LIMIT = 1_000_000;
180
+ function parseMaxChars(raw) {
181
+ const n = Number(raw);
182
+ if (!Number.isFinite(n) || !Number.isInteger(n) || n <= 0) {
183
+ throw new Error(`--max-chars must be a positive integer (got "${raw}")`);
184
+ }
185
+ if (n > MAX_CHARS_HARD_LIMIT) {
186
+ throw new Error(`--max-chars ${n} exceeds hard limit ${MAX_CHARS_HARD_LIMIT}`);
187
+ }
188
+ return n;
189
+ }
190
+ function parseExtractFormat(raw) {
191
+ const v = raw.toLowerCase();
192
+ if (v === "text" || v === "txt" || v === "plain")
193
+ return "text";
194
+ if (v === "html" || v === "raw")
195
+ return "html";
196
+ return "markdown";
197
+ }
198
+ function mapStatus(status) {
199
+ if (status === 404)
200
+ return "not_found";
201
+ if (status === 401 || status === 403)
202
+ return "auth_required";
203
+ if (status === 429)
204
+ return "rate_limited";
205
+ if (status >= 500)
206
+ return "upstream_error";
207
+ return "api_error";
208
+ }
209
+ // REASON: intentionally minimal HTML stripper for `--as text` mode. Strips
210
+ // scripts, styles, tags, and 5 common entities. Does NOT handle CDATA,
211
+ // HTML comments, numeric character references, or HTML5 `<template>` —
212
+ // agents post-process the output anyway. NOT a safe-HTML sanitizer.
213
+ function stripTags(html) {
214
+ return html
215
+ .replace(/<script[\s\S]*?<\/script>/gi, " ")
216
+ .replace(/<style[\s\S]*?<\/style>/gi, " ")
217
+ .replace(/<[^>]+>/g, " ")
218
+ .replace(/&nbsp;/g, " ")
219
+ .replace(/&amp;/g, "&")
220
+ .replace(/&lt;/g, "<")
221
+ .replace(/&gt;/g, ">")
222
+ .replace(/&quot;/g, '"')
223
+ .replace(/\s+/g, " ")
224
+ .trim();
225
+ }
226
+ function successNextActions(url, as, truncated, originalLength) {
227
+ const actions = [];
228
+ if (truncated) {
229
+ const fullCap = Math.min(originalLength, 1_000_000);
230
+ actions.push({
231
+ command: `unicli extract ${url} --max-chars ${fullCap}`,
232
+ description: `Re-extract with larger limit (full rendered length ${originalLength})`,
233
+ params: {
234
+ "max-chars": {
235
+ value: fullCap,
236
+ description: "Truncation cap in characters",
237
+ },
238
+ },
239
+ });
240
+ }
241
+ if (as !== "text") {
242
+ actions.push({
243
+ command: `unicli extract ${url} --as text`,
244
+ description: "Re-extract as plain text (no Markdown formatting)",
245
+ });
246
+ }
247
+ if (as !== "html") {
248
+ actions.push({
249
+ command: `unicli extract ${url} --as html`,
250
+ description: "Re-extract as raw HTML (no cleaning)",
251
+ });
252
+ }
253
+ actions.push({
254
+ command: `unicli do "<natural-language intent>"`,
255
+ description: "Route a natural-language intent to the best-matching adapter (e.g. structured site fetch instead of a raw URL)",
256
+ });
257
+ return actions;
258
+ }
259
+ function errorNextActions(url, errCode) {
260
+ const actions = [
261
+ {
262
+ command: `unicli extract ${url}`,
263
+ description: "Retry the same extraction",
264
+ },
265
+ ];
266
+ if (errCode === "auth_required") {
267
+ actions.push({
268
+ command: `unicli auth setup <site>`,
269
+ description: "Authenticate before retrying",
270
+ params: {
271
+ site: {
272
+ description: "Short site name (e.g. `twitter`, `github`)",
273
+ },
274
+ },
275
+ });
276
+ }
277
+ if (errCode === "not_found" ||
278
+ errCode === "api_error" ||
279
+ errCode === "invalid_input") {
280
+ actions.push({
281
+ command: `unicli do "<natural-language intent>"`,
282
+ description: "Try a structured adapter instead of a raw URL fetch",
283
+ });
284
+ }
285
+ actions.push({
286
+ command: `unicli describe`,
287
+ description: "Inspect available commands and adapters",
288
+ });
289
+ return actions;
290
+ }
291
+ function emitError(baseCtxValue, err, fmt, url) {
292
+ const ctx = {
293
+ ...baseCtxValue,
294
+ next_actions: errorNextActions(url, err.code),
295
+ error: err,
296
+ };
297
+ process.exitCode = mapExitCode(err.code);
298
+ console.log(format(null, undefined, fmt, ctx));
299
+ }
300
+ function mapExitCode(code) {
301
+ switch (code) {
302
+ case "auth_required":
303
+ return 77;
304
+ case "rate_limited":
305
+ case "network_error":
306
+ return 75;
307
+ case "upstream_error":
308
+ return 69;
309
+ case "not_found":
310
+ case "invalid_input":
311
+ return 2;
312
+ default:
313
+ return 1;
314
+ }
315
+ }
316
+ //# sourceMappingURL=extract.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/commands/extract.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA8BG;AAGH,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,oBAAoB,EAAE,MAAM,mBAAmB,CAAC;AACzD,OAAO,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC;AACnD,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AAQ9D,MAAM,iBAAiB,GAAG,MAAM,CAAC;AACjC,MAAM,cAAc,GAAG,SAAS,CAAC;AASjC,MAAM,UAAU,sBAAsB,CAAC,OAAgB;IACrD,OAAO;SACJ,OAAO,CAAC,eAAe,CAAC;SACxB,WAAW,CACV,qEAAqE,CACtE;SACA,MAAM,CACL,iBAAiB,EACjB,iDAAiD,iBAAiB,GAAG,EACrE,MAAM,CAAC,iBAAiB,CAAC,CAC1B;SACA,MAAM,CACL,eAAe,EACf,yDAAyD,EACzD,UAAU,CACX;SACA,MAAM,CAAC,KAAK,EAAE,GAAW,EAAE,IAAiB,EAAE,EAAE;QAC/C,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;QAC7B,MAAM,GAAG,GAAG,YAAY,CACtB,OAAO,CAAC,IAAI,EAAE,CAAC,MAAkC,CAClD,CAAC;QAEF,IAAI,QAAgB,CAAC;QACrB,IAAI,CAAC;YACH,QAAQ,GAAG,aAAa,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC1C,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,SAAS,CACP,OAAO,CAAC,SAAS,CAAC,EAClB;gBACE,IAAI,EAAE,eAAe;gBACrB,OAAO,EACL,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,2BAA2B;gBAC9D,UAAU,EAAE,iCAAiC,oBAAoB,EAAE;gBACnE,SAAS,EAAE,KAAK;aACjB,EACD,GAAG,EACH,GAAG,CACJ,CAAC;YACF,OAAO;QACT,CAAC;QACD,MAAM,QAAQ,GAAG,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAE7C,IAAI,CAAC;YACH,oBAAoB,CAAC,GAAG,CAAC,CAAC;QAC5B,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,SAAS,CACP,OAAO,CAAC,SAAS,CAAC,EAClB;gBACE,IAAI,EAAE,eAAe;gBACrB,OAAO,EACL,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,8BAA8B;gBACjE,UAAU,EACR,wEAAwE;gBAC1E,SAAS,EAAE,KAAK;aACjB,EACD,GAAG,EACH,GAAG,CACJ,CAAC;YACF,OAAO;QACT,CAAC;QAED,IAAI,IAAY,CAAC;QACjB,IAAI,UAAU,GAAG,CAAC,CAAC;QACnB,IAAI,CAAC;YACH,MAAM,IAAI,GAA4B;gBACpC,MAAM,EAAE,KAAK;gBACb,OAAO,EAAE,EAAE,YAAY,EAAE,UAAU,EAAE;aACtC,CAAC;YACF,MAAM,KAAK,GAAG,aAAa,EAAE,CAAC;YAC9B,IAAI,KAAK;gBAAE,IAAI,CAAC,UAAU,GAAG,KAAK,CAAC;YAEnC,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE,IAAmB,CAAC,CAAC;YACnD,UAAU,GAAG,IAAI,CAAC,MAAM,CAAC;YAEzB,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;gBACb,SAAS,CACP,OAAO,CAAC,SAAS,CAAC,EAClB;oBACE,IAAI,EAAE,SAAS,CAAC,IAAI,CAAC,MAAM,CAAC;oBAC5B,OAAO,EAAE,QAAQ,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU,SAAS,GAAG,EAAE;oBAC7D,UAAU,EACR,IAAI,CAAC,MAAM,IAAI,GAAG;wBAChB,CAAC,CAAC,0CAA0C;wBAC5C,CAAC,CAAC,IAAI,CAAC,MAAM,KAAK,GAAG;4BACnB,CAAC,CAAC,mCAAmC;4BACrC,CAAC,CAAC,IAAI,CAAC,MAAM,KAAK,GAAG,IAAI,IAAI,CAAC,MAAM,KAAK,GAAG;gCAC1C,CAAC,CAAC,yDAAyD;gCAC3D,CAAC,CAAC,cAAc,GAAG,uBAAuB;oBAClD,SAAS,EAAE,IAAI,CAAC,MAAM,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,KAAK,GAAG;iBACrD,EACD,GAAG,EACH,GAAG,CACJ,CAAC;gBACF,OAAO;YACT,CAAC;YAED,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAC;YACrD,IAAI,SAAS,IAAI,MAAM,CAAC,SAAS,CAAC,GAAG,cAAc,EAAE,CAAC;gBACpD,SAAS,CACP,OAAO,CAAC,SAAS,CAAC,EAClB;oBACE,8DAA8D;oBAC9D,8DAA8D;oBAC9D,+DAA+D;oBAC/D,gEAAgE;oBAChE,IAAI,EAAE,gBAAgB;oBACtB,OAAO,EAAE,kBAAkB,SAAS,qBAAqB,cAAc,EAAE;oBACzE,UAAU,EACR,qEAAqE;oBACvE,SAAS,EAAE,KAAK;iBACjB,EACD,GAAG,EACH,GAAG,CACJ,CAAC;gBACF,OAAO;YACT,CAAC;YAED,IAAI,GAAG,MAAM,IAAI,CAAC,IAAI,EAAE,CAAC;YACzB,IAAI,IAAI,CAAC,MAAM,GAAG,cAAc,EAAE,CAAC;gBACjC,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,cAAc,CAAC,CAAC;YACvC,CAAC;QACH,CAAC;QAAC,OAAO,CAAC,EAAE,CAAC;YACX,SAAS,CACP,OAAO,CAAC,SAAS,CAAC,EAClB;gBACE,IAAI,EAAE,eAAe;gBACrB,OAAO,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;gBACnD,UAAU,EAAE,4BAA4B,GAAG,wBAAwB;gBACnE,SAAS,EAAE,IAAI;aAChB,EACD,GAAG,EACH,GAAG,CACJ,CAAC;YACF,OAAO;QACT,CAAC;QAED,IAAI,OAAe,CAAC;QACpB,IAAI,QAAQ,KAAK,UAAU,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC;gBACnC,YAAY,EAAE,KAAK;gBACnB,cAAc,EAAE,QAAQ;aACzB,CAAC,CAAC;YACH,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACpC,CAAC;aAAM,IAAI,QAAQ,KAAK,MAAM,EAAE,CAAC;YAC/B,OAAO,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAC5B,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC;QAED,MAAM,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC;QACtC,MAAM,SAAS,GAAG,cAAc,GAAG,QAAQ,CAAC;QAC5C,IAAI,SAAS;YAAE,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;QAEpD,MAAM,GAAG,GAAiB;YACxB,OAAO,EAAE,cAAc;YACvB,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;YACnC,OAAO,EAAE,KAAK;YACd,YAAY,EAAE,kBAAkB,CAC9B,GAAG,EACH,QAAQ,EACR,SAAS,EACT,cAAc,CACf;SACF,CAAC;QAEF,MAAM,IAAI,GAA4B;YACpC,GAAG;YACH,MAAM,EAAE,QAAQ;YAChB,WAAW,EAAE,UAAU;YACvB,MAAM,EAAE,OAAO,CAAC,MAAM;YACtB,eAAe,EAAE,cAAc;YAC/B,SAAS;YACT,OAAO;SACR,CAAC;QAEF,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;AACP,CAAC;AAED,gFAAgF;AAEhF,SAAS,OAAO,CAAC,SAAiB;IAChC,OAAO;QACL,OAAO,EAAE,cAAc;QACvB,WAAW,EAAE,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS;QACnC,OAAO,EAAE,KAAK;KACf,CAAC;AACJ,CAAC;AAED;;;;;GAKG;AACH,MAAM,oBAAoB,GAAG,SAAS,CAAC;AACvC,SAAS,aAAa,CAAC,GAAW;IAChC,MAAM,CAAC,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC;IACtB,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QAC1D,MAAM,IAAI,KAAK,CAAC,gDAAgD,GAAG,IAAI,CAAC,CAAC;IAC3E,CAAC;IACD,IAAI,CAAC,GAAG,oBAAoB,EAAE,CAAC;QAC7B,MAAM,IAAI,KAAK,CACb,eAAe,CAAC,uBAAuB,oBAAoB,EAAE,CAC9D,CAAC;IACJ,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,SAAS,kBAAkB,CAAC,GAAW;IACrC,MAAM,CAAC,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;IAC5B,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,KAAK,IAAI,CAAC,KAAK,OAAO;QAAE,OAAO,MAAM,CAAC;IAChE,IAAI,CAAC,KAAK,MAAM,IAAI,CAAC,KAAK,KAAK;QAAE,OAAO,MAAM,CAAC;IAC/C,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,SAAS,SAAS,CAAC,MAAc;IAC/B,IAAI,MAAM,KAAK,GAAG;QAAE,OAAO,WAAW,CAAC;IACvC,IAAI,MAAM,KAAK,GAAG,IAAI,MAAM,KAAK,GAAG;QAAE,OAAO,eAAe,CAAC;IAC7D,IAAI,MAAM,KAAK,GAAG;QAAE,OAAO,cAAc,CAAC;IAC1C,IAAI,MAAM,IAAI,GAAG;QAAE,OAAO,gBAAgB,CAAC;IAC3C,OAAO,WAAW,CAAC;AACrB,CAAC;AAED,2EAA2E;AAC3E,uEAAuE;AACvE,uEAAuE;AACvE,oEAAoE;AACpE,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI;SACR,OAAO,CAAC,6BAA6B,EAAE,GAAG,CAAC;SAC3C,OAAO,CAAC,2BAA2B,EAAE,GAAG,CAAC;SACzC,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;SACxB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;SACpB,IAAI,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,kBAAkB,CACzB,GAAW,EACX,EAAiB,EACjB,SAAkB,EAClB,cAAsB;IAEtB,MAAM,OAAO,GAAsB,EAAE,CAAC;IAEtC,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,cAAc,EAAE,SAAS,CAAC,CAAC;QACpD,OAAO,CAAC,IAAI,CAAC;YACX,OAAO,EAAE,kBAAkB,GAAG,gBAAgB,OAAO,EAAE;YACvD,WAAW,EAAE,sDAAsD,cAAc,GAAG;YACpF,MAAM,EAAE;gBACN,WAAW,EAAE;oBACX,KAAK,EAAE,OAAO;oBACd,WAAW,EAAE,8BAA8B;iBAC5C;aACF;SACF,CAAC,CAAC;IACL,CAAC;IAED,IAAI,EAAE,KAAK,MAAM,EAAE,CAAC;QAClB,OAAO,CAAC,IAAI,CAAC;YACX,OAAO,EAAE,kBAAkB,GAAG,YAAY;YAC1C,WAAW,EAAE,mDAAmD;SACjE,CAAC,CAAC;IACL,CAAC;IACD,IAAI,EAAE,KAAK,MAAM,EAAE,CAAC;QAClB,OAAO,CAAC,IAAI,CAAC;YACX,OAAO,EAAE,kBAAkB,GAAG,YAAY;YAC1C,WAAW,EAAE,sCAAsC;SACpD,CAAC,CAAC;IACL,CAAC;IAED,OAAO,CAAC,IAAI,CAAC;QACX,OAAO,EAAE,uCAAuC;QAChD,WAAW,EACT,gHAAgH;KACnH,CAAC,CAAC;IAEH,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,gBAAgB,CAAC,GAAW,EAAE,OAAe;IACpD,MAAM,OAAO,GAAsB;QACjC;YACE,OAAO,EAAE,kBAAkB,GAAG,EAAE;YAChC,WAAW,EAAE,2BAA2B;SACzC;KACF,CAAC;IACF,IAAI,OAAO,KAAK,eAAe,EAAE,CAAC;QAChC,OAAO,CAAC,IAAI,CAAC;YACX,OAAO,EAAE,0BAA0B;YACnC,WAAW,EAAE,8BAA8B;YAC3C,MAAM,EAAE;gBACN,IAAI,EAAE;oBACJ,WAAW,EAAE,4CAA4C;iBAC1D;aACF;SACF,CAAC,CAAC;IACL,CAAC;IACD,IACE,OAAO,KAAK,WAAW;QACvB,OAAO,KAAK,WAAW;QACvB,OAAO,KAAK,eAAe,EAC3B,CAAC;QACD,OAAO,CAAC,IAAI,CAAC;YACX,OAAO,EAAE,uCAAuC;YAChD,WAAW,EAAE,qDAAqD;SACnE,CAAC,CAAC;IACL,CAAC;IACD,OAAO,CAAC,IAAI,CAAC;QACX,OAAO,EAAE,iBAAiB;QAC1B,WAAW,EAAE,yCAAyC;KACvD,CAAC,CAAC;IACH,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,SAAS,CAChB,YAAgD,EAChD,GAAe,EACf,GAAiB,EACjB,GAAW;IAEX,MAAM,GAAG,GAAiB;QACxB,GAAG,YAAY;QACf,YAAY,EAAE,gBAAgB,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,CAAC;QAC7C,KAAK,EAAE,GAAG;KACX,CAAC;IACF,OAAO,CAAC,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;IACzC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,SAAS,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;AACjD,CAAC;AAED,SAAS,WAAW,CAAC,IAAY;IAC/B,QAAQ,IAAI,EAAE,CAAC;QACb,KAAK,eAAe;YAClB,OAAO,EAAE,CAAC;QACZ,KAAK,cAAc,CAAC;QACpB,KAAK,eAAe;YAClB,OAAO,EAAE,CAAC;QACZ,KAAK,gBAAgB;YACnB,OAAO,EAAE,CAAC;QACZ,KAAK,WAAW,CAAC;QACjB,KAAK,eAAe;YAClB,OAAO,CAAC,CAAC;QACX;YACE,OAAO,CAAC,CAAC;IACb,CAAC;AACH,CAAC"}