@woladi/sortai 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -14
- package/dist/cli.js +59 -284
- package/dist/commands/clear.js +55 -0
- package/dist/commands/init.js +32 -0
- package/dist/commands/organize.js +92 -0
- package/dist/commands/sample.js +69 -0
- package/dist/commands/tag.js +244 -0
- package/dist/config.js +30 -2
- package/dist/defaults.js +18 -0
- package/dist/llm/index.js +47 -4
- package/dist/llm/ollama-detect.js +35 -0
- package/dist/llm/prompt.js +31 -0
- package/dist/mask.js +27 -12
- package/dist/organize/move.js +20 -0
- package/dist/organize/plan.js +84 -0
- package/dist/organize/read-tags.js +18 -0
- package/dist/tags.js +32 -3
- package/dist/wizard/index.js +359 -0
- package/dist/wizard/languages.js +34 -0
- package/dist/wizard/refine.js +195 -0
- package/dist/wizard/sample.js +41 -0
- package/dist/wizard/taxonomy.js +57 -0
- package/package.json +8 -2
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
import { select, input, confirm, editor } from '@inquirer/prompts';
|
|
3
|
+
import { preTagFromPath } from '../pretag.js';
|
|
4
|
+
import { mergeTags } from '../tags.js';
|
|
5
|
+
import { inferTagsAndComment } from '../llm/index.js';
|
|
6
|
+
import { applyTaxonomyToConfig, generateTaxonomy } from './taxonomy.js';
|
|
7
|
+
function pad(s, w) {
|
|
8
|
+
return s.length >= w ? s : s + ' '.repeat(w - s.length);
|
|
9
|
+
}
|
|
10
|
+
export function renderTaxonomyTable(tax) {
|
|
11
|
+
if (tax.categories.length === 0)
|
|
12
|
+
return chalk.yellow('(brak kategorii)');
|
|
13
|
+
const lines = [];
|
|
14
|
+
const nameW = Math.max(6, ...tax.categories.map(c => c.name.length));
|
|
15
|
+
lines.push(chalk.bold(`${pad('Tag', nameW)} Strict Przykłady`));
|
|
16
|
+
lines.push(chalk.gray('─'.repeat(Math.min(80, nameW + 12 + 50))));
|
|
17
|
+
for (const c of tax.categories) {
|
|
18
|
+
const examples = c.examples.slice(0, 3).join(', ') || chalk.gray('(brak)');
|
|
19
|
+
const strict = c.isStrict ? chalk.yellow(' ✓ ') : ' ';
|
|
20
|
+
lines.push(`${pad(c.name, nameW)} ${strict} ${examples}`);
|
|
21
|
+
}
|
|
22
|
+
if (tax.summary) {
|
|
23
|
+
lines.push('');
|
|
24
|
+
lines.push(chalk.gray(`Podsumowanie: ${tax.summary}`));
|
|
25
|
+
}
|
|
26
|
+
return lines.join('\n');
|
|
27
|
+
}
|
|
28
|
+
async function editCategory(cat) {
|
|
29
|
+
while (true) {
|
|
30
|
+
const action = await select({
|
|
31
|
+
message: `Edytuj ${chalk.cyan(cat.name)} (strict=${cat.isStrict}, aliasy=[${cat.aliases.join(',')}])`,
|
|
32
|
+
choices: [
|
|
33
|
+
{ name: 'Zmień nazwę', value: 'name' },
|
|
34
|
+
{ name: 'Zmień opis', value: 'desc' },
|
|
35
|
+
{ name: 'Zmień aliasy (CSV)', value: 'aliases' },
|
|
36
|
+
{ name: cat.isStrict ? 'Wyłącz strict' : 'Włącz strict', value: 'strict' },
|
|
37
|
+
{ name: 'Zmień strict_evidence (CSV)', value: 'evidence' },
|
|
38
|
+
{ name: chalk.red('Usuń tag'), value: 'drop' },
|
|
39
|
+
{ name: chalk.green('Gotowe'), value: 'done' },
|
|
40
|
+
],
|
|
41
|
+
});
|
|
42
|
+
if (action === 'done')
|
|
43
|
+
return cat;
|
|
44
|
+
if (action === 'drop')
|
|
45
|
+
return null;
|
|
46
|
+
if (action === 'name') {
|
|
47
|
+
const v = await input({ message: 'Nowa nazwa (#Tag):', default: cat.name });
|
|
48
|
+
cat.name = v.startsWith('#') ? v : `#${v}`;
|
|
49
|
+
}
|
|
50
|
+
else if (action === 'desc') {
|
|
51
|
+
cat.description = await input({ message: 'Opis:', default: cat.description });
|
|
52
|
+
}
|
|
53
|
+
else if (action === 'aliases') {
|
|
54
|
+
const v = await input({ message: 'Aliasy (oddzielone przecinkiem):', default: cat.aliases.join(',') });
|
|
55
|
+
cat.aliases = v.split(',').map(s => s.trim()).filter(Boolean).map(a => a.startsWith('#') ? a : `#${a}`);
|
|
56
|
+
}
|
|
57
|
+
else if (action === 'strict') {
|
|
58
|
+
cat.isStrict = !cat.isStrict;
|
|
59
|
+
}
|
|
60
|
+
else if (action === 'evidence') {
|
|
61
|
+
const v = await input({ message: 'Słowa-dowody (oddzielone przecinkiem):', default: cat.strictEvidence.join(',') });
|
|
62
|
+
cat.strictEvidence = v.split(',').map(s => s.trim()).filter(Boolean);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
async function showSamplePreview(taxonomy, samples, baseCfg) {
|
|
67
|
+
const cfg = applyTaxonomyToConfig(taxonomy, baseCfg);
|
|
68
|
+
process.stdout.write('\n' + chalk.cyan('🔍 Podgląd tagowania na próbce (bez zapisu):\n\n'));
|
|
69
|
+
for (const s of samples.slice(0, 10)) {
|
|
70
|
+
const preTags = preTagFromPath(s.path, s.ocrText, cfg);
|
|
71
|
+
process.stdout.write(chalk.bold(` ${s.name}\n`));
|
|
72
|
+
let tags;
|
|
73
|
+
let comment;
|
|
74
|
+
if (preTags.length >= 4 && !s.ocrText.trim()) {
|
|
75
|
+
tags = mergeTags(cfg, preTags, [cfg.tags.autoTag]);
|
|
76
|
+
comment = `Auto z nazwy/ścieżki: ${s.name}.`;
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
try {
|
|
80
|
+
const res = await inferTagsAndComment({ fileName: s.name, ext: s.ext, preTags, ocrText: s.ocrText }, cfg);
|
|
81
|
+
tags = res.tags;
|
|
82
|
+
comment = res.comment;
|
|
83
|
+
}
|
|
84
|
+
catch {
|
|
85
|
+
tags = preTags;
|
|
86
|
+
comment = `Plik: ${s.name}.`;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
const color = tags.length >= 2 ? chalk.green : tags.length === 1 ? chalk.yellow : chalk.red;
|
|
90
|
+
process.stdout.write(color(` ${tags.join(' ') || '(brak tagów)'}\n`));
|
|
91
|
+
process.stdout.write(chalk.gray(` ${comment}\n`));
|
|
92
|
+
}
|
|
93
|
+
process.stdout.write('\n');
|
|
94
|
+
}
|
|
95
|
+
export async function refineTaxonomyLoop(initial, deps) {
|
|
96
|
+
let taxonomy = { ...initial, categories: [...initial.categories] };
|
|
97
|
+
while (true) {
|
|
98
|
+
process.stdout.write('\n' + renderTaxonomyTable(taxonomy) + '\n\n');
|
|
99
|
+
const action = await select({
|
|
100
|
+
message: 'Co dalej z taksonomią?',
|
|
101
|
+
choices: [
|
|
102
|
+
{ name: chalk.green('Akceptuj i zapisz config'), value: 'accept' },
|
|
103
|
+
{ name: 'Edytuj per-tag (rename / strict / drop / evidence)', value: 'edit' },
|
|
104
|
+
{ name: 'Dodaj nowy tag ręcznie', value: 'add' },
|
|
105
|
+
{ name: 'Otwórz w edytorze ($EDITOR)', value: 'editor' },
|
|
106
|
+
{ name: 'Regeneruj LLM-em z dodatkową wskazówką', value: 'regen' },
|
|
107
|
+
{ name: 'Pokaż jak otaguje próbkę z tym configiem', value: 'preview' },
|
|
108
|
+
],
|
|
109
|
+
});
|
|
110
|
+
if (action === 'accept')
|
|
111
|
+
return taxonomy;
|
|
112
|
+
if (action === 'edit') {
|
|
113
|
+
if (taxonomy.categories.length === 0) {
|
|
114
|
+
process.stdout.write(chalk.yellow('Brak tagów do edycji.\n'));
|
|
115
|
+
continue;
|
|
116
|
+
}
|
|
117
|
+
const chosen = await select({
|
|
118
|
+
message: 'Który tag edytować?',
|
|
119
|
+
choices: taxonomy.categories.map((c, i) => ({ name: `${c.name} (${c.isStrict ? 'strict' : 'safe'})`, value: i })),
|
|
120
|
+
});
|
|
121
|
+
const edited = await editCategory({ ...taxonomy.categories[chosen] });
|
|
122
|
+
const next = [...taxonomy.categories];
|
|
123
|
+
if (edited === null) {
|
|
124
|
+
next.splice(chosen, 1);
|
|
125
|
+
}
|
|
126
|
+
else {
|
|
127
|
+
next[chosen] = edited;
|
|
128
|
+
}
|
|
129
|
+
taxonomy = { ...taxonomy, categories: next };
|
|
130
|
+
}
|
|
131
|
+
if (action === 'add') {
|
|
132
|
+
const name = await input({ message: 'Nazwa tagu (#Tag):' });
|
|
133
|
+
if (!name)
|
|
134
|
+
continue;
|
|
135
|
+
const normalized = name.startsWith('#') ? name : `#${name}`;
|
|
136
|
+
const description = await input({ message: 'Opis (opcjonalnie):' });
|
|
137
|
+
const isStrict = await confirm({ message: 'Tag strict (wymaga słów-dowodów)?', default: false });
|
|
138
|
+
const evidence = isStrict
|
|
139
|
+
? (await input({ message: 'Słowa-dowody (CSV):' })).split(',').map(s => s.trim()).filter(Boolean)
|
|
140
|
+
: [];
|
|
141
|
+
const aliasInput = await input({ message: 'Aliasy (CSV, opcjonalnie):' });
|
|
142
|
+
const aliases = aliasInput.split(',').map(s => s.trim()).filter(Boolean).map(a => a.startsWith('#') ? a : `#${a}`);
|
|
143
|
+
taxonomy = {
|
|
144
|
+
...taxonomy,
|
|
145
|
+
categories: [
|
|
146
|
+
...taxonomy.categories,
|
|
147
|
+
{ name: normalized, description, aliases, strictEvidence: evidence, isStrict, examples: [] },
|
|
148
|
+
],
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
if (action === 'editor') {
|
|
152
|
+
const draft = JSON.stringify(taxonomy.categories, null, 2);
|
|
153
|
+
const edited = await editor({
|
|
154
|
+
message: 'Edytuj kategorie jako JSON. Zapisz i zamknij edytor żeby kontynuować.',
|
|
155
|
+
default: draft,
|
|
156
|
+
postfix: '.json',
|
|
157
|
+
});
|
|
158
|
+
try {
|
|
159
|
+
const parsed = JSON.parse(edited);
|
|
160
|
+
if (Array.isArray(parsed)) {
|
|
161
|
+
taxonomy = { ...taxonomy, categories: parsed };
|
|
162
|
+
}
|
|
163
|
+
else {
|
|
164
|
+
process.stdout.write(chalk.red('Nie tablica — anulowano.\n'));
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
catch (err) {
|
|
168
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
169
|
+
process.stdout.write(chalk.red(`Błąd JSON: ${msg} — anulowano.\n`));
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
if (action === 'regen') {
|
|
173
|
+
const hint = await input({
|
|
174
|
+
message: 'Wskazówka dla LLM (np. "więcej kategorii finansowych", "rozdziel CV i kariera"):',
|
|
175
|
+
});
|
|
176
|
+
try {
|
|
177
|
+
const next = await generateTaxonomy(deps.samples, deps.langs, deps.userContext, deps.baseCfg, hint);
|
|
178
|
+
taxonomy = next;
|
|
179
|
+
}
|
|
180
|
+
catch (err) {
|
|
181
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
182
|
+
process.stdout.write(chalk.red(`Regeneracja nieudana: ${msg}\n`));
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
if (action === 'preview') {
|
|
186
|
+
try {
|
|
187
|
+
await showSamplePreview(taxonomy, deps.samples, deps.baseCfg);
|
|
188
|
+
}
|
|
189
|
+
catch (err) {
|
|
190
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
191
|
+
process.stdout.write(chalk.red(`Podgląd nieudany: ${msg}\n`));
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { walkFiles } from '../walker.js';
|
|
3
|
+
import { extractOcrText } from '../ocr.js';
|
|
4
|
+
function shuffle(arr, seed) {
|
|
5
|
+
const out = arr.slice();
|
|
6
|
+
let s = seed ?? Date.now();
|
|
7
|
+
for (let i = out.length - 1; i > 0; i--) {
|
|
8
|
+
s = (s * 9301 + 49297) % 233280;
|
|
9
|
+
const j = Math.floor((s / 233280) * (i + 1));
|
|
10
|
+
[out[i], out[j]] = [out[j], out[i]];
|
|
11
|
+
}
|
|
12
|
+
return out;
|
|
13
|
+
}
|
|
14
|
+
export async function pickSampleFiles(root, cfg, opts) {
|
|
15
|
+
const all = await walkFiles(root, cfg);
|
|
16
|
+
const ocrSet = new Set(cfg.scan.ocrExtensions);
|
|
17
|
+
const filtered = opts.ocrEligibleOnly
|
|
18
|
+
? all.filter(f => ocrSet.has(path.extname(f).toLowerCase()))
|
|
19
|
+
: all;
|
|
20
|
+
return shuffle(filtered, opts.seed).slice(0, opts.count);
|
|
21
|
+
}
|
|
22
|
+
export async function ocrSamples(files, cfg, onProgress) {
|
|
23
|
+
const out = [];
|
|
24
|
+
for (let i = 0; i < files.length; i++) {
|
|
25
|
+
const f = files[i];
|
|
26
|
+
const ext = path.extname(f).toLowerCase();
|
|
27
|
+
onProgress?.(i, files.length, f);
|
|
28
|
+
let text = '';
|
|
29
|
+
if (cfg.scan.ocrExtensions.includes(ext)) {
|
|
30
|
+
text = await extractOcrText(f, cfg);
|
|
31
|
+
}
|
|
32
|
+
out.push({
|
|
33
|
+
path: f,
|
|
34
|
+
name: path.basename(f),
|
|
35
|
+
ext,
|
|
36
|
+
ocrText: text,
|
|
37
|
+
});
|
|
38
|
+
}
|
|
39
|
+
onProgress?.(files.length, files.length, '');
|
|
40
|
+
return out;
|
|
41
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { inferTaxonomy } from '../llm/index.js';
|
|
2
|
+
export async function generateTaxonomy(samples, langs, userContext, cfg, hint) {
|
|
3
|
+
return inferTaxonomy(samples, langs, userContext, cfg, hint);
|
|
4
|
+
}
|
|
5
|
+
export function applyTaxonomyToConfig(taxonomy, cfg) {
|
|
6
|
+
const allowed = new Set();
|
|
7
|
+
const strict = [];
|
|
8
|
+
const aliases = {};
|
|
9
|
+
const strictEvidence = {};
|
|
10
|
+
for (const cat of taxonomy.categories) {
|
|
11
|
+
allowed.add(cat.name);
|
|
12
|
+
for (const a of cat.aliases) {
|
|
13
|
+
if (a !== cat.name)
|
|
14
|
+
aliases[a] = cat.name;
|
|
15
|
+
}
|
|
16
|
+
if (cat.isStrict) {
|
|
17
|
+
strict.push(cat.name);
|
|
18
|
+
if (cat.strictEvidence.length > 0) {
|
|
19
|
+
strictEvidence[cat.name] = cat.strictEvidence;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
allowed.add(cfg.tags.autoTag);
|
|
24
|
+
allowed.add('#Duplikat');
|
|
25
|
+
allowed.add('#PrawdopodobnaKopia');
|
|
26
|
+
allowed.add('#Skan');
|
|
27
|
+
allowed.add('#Screenshot');
|
|
28
|
+
allowed.add('#Foto');
|
|
29
|
+
allowed.add('#Grafika');
|
|
30
|
+
allowed.add('#Email');
|
|
31
|
+
allowed.add('#Nagranie');
|
|
32
|
+
return {
|
|
33
|
+
...cfg,
|
|
34
|
+
tags: {
|
|
35
|
+
...cfg.tags,
|
|
36
|
+
allowed: [...allowed],
|
|
37
|
+
strict,
|
|
38
|
+
aliases: { ...cfg.tags.aliases, ...aliases },
|
|
39
|
+
strictEvidence: { ...cfg.tags.strictEvidence, ...strictEvidence },
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
export function taxonomyFromConfig(cfg) {
|
|
44
|
+
const cats = cfg.tags.allowed
|
|
45
|
+
.filter(t => t !== cfg.tags.autoTag)
|
|
46
|
+
.map(name => ({
|
|
47
|
+
name,
|
|
48
|
+
description: '',
|
|
49
|
+
aliases: Object.entries(cfg.tags.aliases)
|
|
50
|
+
.filter(([, target]) => target === name)
|
|
51
|
+
.map(([alias]) => alias),
|
|
52
|
+
strictEvidence: cfg.tags.strictEvidence[name] ?? [],
|
|
53
|
+
isStrict: cfg.tags.strict.includes(name),
|
|
54
|
+
examples: [],
|
|
55
|
+
}));
|
|
56
|
+
return { categories: cats, summary: '' };
|
|
57
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@woladi/sortai",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Automatically tag and describe your files using Apple Vision OCR + local Ollama or cloud LLM — writes native Finder tags and comments searchable in Spotlight",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/cli.js",
|
|
@@ -16,6 +16,9 @@
|
|
|
16
16
|
"build": "tsc",
|
|
17
17
|
"start": "node dist/cli.js",
|
|
18
18
|
"typecheck": "tsc --noEmit",
|
|
19
|
+
"test": "vitest run",
|
|
20
|
+
"test:watch": "vitest",
|
|
21
|
+
"test:e2e": "expect tests/e2e/wizard.expect",
|
|
19
22
|
"prepublishOnly": "npm run build"
|
|
20
23
|
},
|
|
21
24
|
"keywords": [
|
|
@@ -41,6 +44,7 @@
|
|
|
41
44
|
},
|
|
42
45
|
"dependencies": {
|
|
43
46
|
"@anthropic-ai/sdk": "^0.30.1",
|
|
47
|
+
"@inquirer/prompts": "^8.4.3",
|
|
44
48
|
"@modelcontextprotocol/sdk": "^1.0.4",
|
|
45
49
|
"chalk": "^5.3.0",
|
|
46
50
|
"commander": "^12.1.0",
|
|
@@ -49,8 +53,10 @@
|
|
|
49
53
|
"zod": "^3.23.8"
|
|
50
54
|
},
|
|
51
55
|
"devDependencies": {
|
|
56
|
+
"@inquirer/testing": "^3.3.6",
|
|
52
57
|
"@types/node": "^20.19.39",
|
|
53
|
-
"typescript": "^5.4.5"
|
|
58
|
+
"typescript": "^5.4.5",
|
|
59
|
+
"vitest": "^4.1.6"
|
|
54
60
|
},
|
|
55
61
|
"engines": {
|
|
56
62
|
"node": ">=20.0.0"
|