@woladi/sortai 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ import path from 'node:path';
2
+ import { existsSync } from 'node:fs';
3
+ import chalk from 'chalk';
4
+ import ora from 'ora';
5
+ import { expandHome, loadConfig, configExists } from '../config.js';
6
+ import { buildOrganizePlan } from '../organize/plan.js';
7
+ import { executeMove } from '../organize/move.js';
8
+ import { runWizard } from '../wizard/index.js';
9
+ function renderPlan(plan, root) {
10
+ process.stdout.write(chalk.bold(`📋 Plan przenoszenia (${plan.moves.length} plików):\n\n`));
11
+ const maxShow = 50;
12
+ for (const m of plan.moves.slice(0, maxShow)) {
13
+ const from = path.relative(root, m.from);
14
+ const to = m.to;
15
+ const tag = m.primaryTag ? chalk.cyan(` [${m.primaryTag}]`) : chalk.gray(' [_unsorted]');
16
+ process.stdout.write(` ${from}\n → ${to}${tag}\n`);
17
+ }
18
+ if (plan.moves.length > maxShow) {
19
+ process.stdout.write(chalk.gray(` … i ${plan.moves.length - maxShow} więcej\n`));
20
+ }
21
+ if (plan.skips.length > 0) {
22
+ process.stdout.write('\n' + chalk.gray(`Pominięte (${plan.skips.length}):\n`));
23
+ for (const s of plan.skips.slice(0, 10)) {
24
+ process.stdout.write(chalk.gray(` ${path.relative(root, s.path)} — ${s.reason}\n`));
25
+ }
26
+ if (plan.skips.length > 10) {
27
+ process.stdout.write(chalk.gray(` … i ${plan.skips.length - 10} więcej\n`));
28
+ }
29
+ }
30
+ if (plan.conflicts > 0) {
31
+ process.stdout.write('\n' + chalk.yellow(`⚠️ ${plan.conflicts} konfliktów nazw — dodano sufiksy _2, _3 itd.\n`));
32
+ }
33
+ }
34
+ export async function organizeCommand(folder, opts) {
35
+ if (!(await configExists(opts.config))) {
36
+ process.stdout.write(chalk.cyan('🪄 Brak configu — uruchamiam wizard…\n\n'));
37
+ await runWizard({ configPath: opts.config, folderHint: folder });
38
+ return;
39
+ }
40
+ const { config } = await loadConfig(opts.config);
41
+ const cfg = opts.target
42
+ ? { ...config, organize: { ...config.organize, target: opts.target, enabled: true } }
43
+ : config;
44
+ const root = path.resolve(expandHome(folder ?? cfg.scan.folder));
45
+ if (!existsSync(root)) {
46
+ process.stderr.write(chalk.red(`Folder nie istnieje: ${root}\n`));
47
+ process.exit(1);
48
+ }
49
+ process.stdout.write(chalk.cyan(`📂 Sortowanie ${root} → ${expandHome(cfg.organize.target)}\n`));
50
+ process.stdout.write(chalk.gray(` Strategia: ${cfg.organize.strategy}, brak tagów: ${cfg.organize.unsorted}\n\n`));
51
+ const spin = ora('Czytam tagi i buduję plan…').start();
52
+ let plan;
53
+ try {
54
+ plan = await buildOrganizePlan(root, cfg);
55
+ }
56
+ catch (err) {
57
+ const msg = err instanceof Error ? err.message : String(err);
58
+ spin.fail(`Plan failed: ${msg}`);
59
+ process.exit(1);
60
+ }
61
+ spin.succeed(`Plan: ${plan.moves.length} przenosin, ${plan.skips.length} pominięć`);
62
+ renderPlan(plan, root);
63
+ if (!opts.apply || opts.dryRun) {
64
+ process.stdout.write('\n' + chalk.yellow('🔍 Dry-run — żadne pliki nie zostały przeniesione.\n'));
65
+ process.stdout.write(chalk.gray(' Uruchom z --apply żeby wykonać.\n'));
66
+ return;
67
+ }
68
+ if (plan.moves.length === 0) {
69
+ process.stdout.write('\n' + chalk.gray('Nic do przenoszenia.\n'));
70
+ return;
71
+ }
72
+ process.stdout.write('\n' + chalk.bold('🚚 Wykonuję przenoszenia…\n'));
73
+ let ok = 0;
74
+ let errors = 0;
75
+ for (const m of plan.moves) {
76
+ try {
77
+ await executeMove(m);
78
+ ok++;
79
+ if (opts.verbose)
80
+ process.stdout.write(chalk.gray(` ✓ ${path.basename(m.from)}\n`));
81
+ }
82
+ catch (err) {
83
+ const msg = err instanceof Error ? err.message : String(err);
84
+ process.stdout.write(chalk.red(` ✗ ${path.basename(m.from)}: ${msg}\n`));
85
+ errors++;
86
+ }
87
+ }
88
+ process.stdout.write('\n' + chalk.bold('✨ Done\n'));
89
+ process.stdout.write(chalk.green(` ✓ Przeniesione: ${ok}\n`));
90
+ if (errors)
91
+ process.stdout.write(chalk.red(` ✗ Błędy: ${errors}\n`));
92
+ }
@@ -0,0 +1,69 @@
1
+ import path from 'node:path';
2
+ import { existsSync } from 'node:fs';
3
+ import chalk from 'chalk';
4
+ import ora from 'ora';
5
+ import { expandHome, loadConfig, configExists } from '../config.js';
6
+ import { preTagFromPath } from '../pretag.js';
7
+ import { mergeTags } from '../tags.js';
8
+ import { inferTagsAndComment } from '../llm/index.js';
9
+ import { pickSampleFiles, ocrSamples } from '../wizard/sample.js';
10
+ import { runWizard } from '../wizard/index.js';
11
+ export async function sampleCommand(folder, opts) {
12
+ if (!(await configExists(opts.config))) {
13
+ process.stdout.write(chalk.cyan('🪄 Brak configu — uruchamiam wizard…\n\n'));
14
+ await runWizard({ configPath: opts.config, folderHint: folder });
15
+ return;
16
+ }
17
+ const { config: cfg } = await loadConfig(opts.config);
18
+ const root = path.resolve(expandHome(folder ?? cfg.scan.folder));
19
+ if (!existsSync(root)) {
20
+ process.stderr.write(chalk.red(`Folder nie istnieje: ${root}\n`));
21
+ process.exit(1);
22
+ }
23
+ const n = opts.count ?? 20;
24
+ const files = await pickSampleFiles(root, cfg, { count: n, ocrEligibleOnly: true });
25
+ if (files.length === 0) {
26
+ process.stdout.write(chalk.yellow('Brak plików do próbkowania.\n'));
27
+ return;
28
+ }
29
+ const spin = ora(`OCR ${files.length} próbek…`).start();
30
+ const samples = await ocrSamples(files, cfg, (done, total) => {
31
+ spin.text = `OCR ${done}/${total}`;
32
+ });
33
+ spin.succeed('OCR gotowy');
34
+ let ok = 0;
35
+ let weak = 0;
36
+ for (const s of samples) {
37
+ process.stdout.write(chalk.bold(`\n${s.name}\n`));
38
+ const preTags = preTagFromPath(s.path, s.ocrText, cfg);
39
+ let tags;
40
+ let comment;
41
+ if (preTags.length >= 4 && !s.ocrText.trim()) {
42
+ tags = mergeTags(cfg, preTags, [cfg.tags.autoTag]);
43
+ comment = `Auto z nazwy/ścieżki: ${s.name}.`;
44
+ }
45
+ else {
46
+ try {
47
+ const res = await inferTagsAndComment({ fileName: s.name, ext: s.ext, preTags, ocrText: s.ocrText }, cfg);
48
+ tags = res.tags;
49
+ comment = res.comment;
50
+ }
51
+ catch {
52
+ tags = preTags;
53
+ comment = `Plik: ${s.name}.`;
54
+ }
55
+ }
56
+ const meaningful = tags.filter(t => t !== cfg.tags.autoTag);
57
+ const color = meaningful.length >= 2 ? chalk.green : meaningful.length === 1 ? chalk.yellow : chalk.red;
58
+ if (meaningful.length >= 1)
59
+ ok++;
60
+ else
61
+ weak++;
62
+ process.stdout.write(color(` ${tags.join(' ') || '(brak)'}\n`));
63
+ process.stdout.write(chalk.gray(` ${comment}\n`));
64
+ }
65
+ process.stdout.write('\n');
66
+ process.stdout.write(chalk.bold('✨ Sample done\n'));
67
+ process.stdout.write(chalk.green(` ✓ Otagowane: ${ok}\n`));
68
+ process.stdout.write(chalk.red(` ⚠ Słabe: ${weak}\n`));
69
+ }
@@ -0,0 +1,244 @@
1
+ import path from 'node:path';
2
+ import { existsSync } from 'node:fs';
3
+ import { execFile } from 'node:child_process';
4
+ import { promisify } from 'node:util';
5
+ import chalk from 'chalk';
6
+ import ora from 'ora';
7
+ import { expandHome, loadConfig, configExists } from '../config.js';
8
+ import { walkFiles } from '../walker.js';
9
+ import { extractOcrText } from '../ocr.js';
10
+ import { preTagFromPath } from '../pretag.js';
11
+ import { mergeTags, TagDiscovery } from '../tags.js';
12
+ import { writeFileMetadata } from '../macos.js';
13
+ import { Masker } from '../mask.js';
14
+ import { inferTagsAndComment } from '../llm/index.js';
15
+ import { findDuplicates } from '../dedup.js';
16
+ import { runWizard } from '../wizard/index.js';
17
+ const execFileAsync = promisify(execFile);
18
+ function applyOverrides(cfg, opts) {
19
+ const apiKey = opts.apiKey
20
+ ?? process.env.SORTAI_API_KEY
21
+ ?? (opts.cloud === 'anthropic' ? process.env.ANTHROPIC_API_KEY : undefined)
22
+ ?? (opts.cloud === 'openai' ? process.env.OPENAI_API_KEY : undefined)
23
+ ?? cfg.llm.apiKey;
24
+ const provider = opts.cloud ?? cfg.llm.provider;
25
+ const defaultCloudModels = {
26
+ anthropic: 'claude-sonnet-4-6',
27
+ openai: 'gpt-4o-mini',
28
+ };
29
+ return {
30
+ ...cfg,
31
+ scan: {
32
+ ...cfg.scan,
33
+ excludeFolders: opts.exclude
34
+ ? opts.exclude.split(',').map(s => s.trim()).filter(Boolean)
35
+ : cfg.scan.excludeFolders,
36
+ },
37
+ llm: {
38
+ ...cfg.llm,
39
+ provider,
40
+ model: opts.model ?? (opts.cloud ? defaultCloudModels[opts.cloud] ?? cfg.llm.model : cfg.llm.model),
41
+ ollamaUrl: opts.ollamaUrl ?? cfg.llm.ollamaUrl,
42
+ apiKey,
43
+ },
44
+ mask: {
45
+ ...cfg.mask,
46
+ enabled: opts.mask ?? cfg.mask.enabled,
47
+ lang: opts.lang ?? cfg.mask.lang,
48
+ },
49
+ dedup: {
50
+ ...cfg.dedup,
51
+ enabled: opts.dedup ?? cfg.dedup.enabled,
52
+ },
53
+ tags: {
54
+ ...cfg.tags,
55
+ freeForm: opts.free ?? cfg.tags.freeForm,
56
+ },
57
+ };
58
+ }
59
+ export async function tagCommand(folder, opts) {
60
+ if (!(await configExists(opts.config))) {
61
+ process.stdout.write(chalk.cyan('🪄 Brak configu — uruchamiam interaktywny wizard…\n\n'));
62
+ const result = await runWizard({ configPath: opts.config, folderHint: folder });
63
+ if (!result.shouldRunTag)
64
+ return;
65
+ folder = folder ?? result.config.scan.folder;
66
+ }
67
+ let cfgResult;
68
+ try {
69
+ cfgResult = await loadConfig(opts.config);
70
+ }
71
+ catch (err) {
72
+ const msg = err instanceof Error ? err.message : String(err);
73
+ process.stderr.write(chalk.red(`Config error: ${msg}\n`));
74
+ process.exit(1);
75
+ }
76
+ const cfg = applyOverrides(cfgResult.config, opts);
77
+ if (cfg.llm.provider !== 'ollama' && !cfg.llm.apiKey) {
78
+ process.stderr.write(chalk.red(`Brak klucza API dla ${cfg.llm.provider}. Podaj --api-key lub ustaw env.\n`));
79
+ process.exit(1);
80
+ }
81
+ const rawFolder = folder ?? cfg.scan.folder;
82
+ const root = path.resolve(expandHome(rawFolder));
83
+ if (!existsSync(root)) {
84
+ process.stderr.write(chalk.red(`Folder nie istnieje: ${root}\n`));
85
+ process.exit(1);
86
+ }
87
+ let masker;
88
+ if (cfg.mask.enabled && cfg.llm.provider !== 'ollama') {
89
+ masker = new Masker(cfg);
90
+ const spin = ora('Starting pseudonym-mcp…').start();
91
+ try {
92
+ await masker.connect();
93
+ spin.succeed('pseudonym-mcp ready');
94
+ }
95
+ catch (err) {
96
+ spin.fail(err instanceof Error ? err.message : String(err));
97
+ masker = undefined;
98
+ }
99
+ }
100
+ process.stdout.write(chalk.cyan(`🚀 Scanning ${root}\n`));
101
+ process.stdout.write(` Provider: ${cfg.llm.provider} (${cfg.llm.model})`);
102
+ if (cfg.mask.enabled && masker)
103
+ process.stdout.write(chalk.gray(' [masked]'));
104
+ if (opts.dryRun)
105
+ process.stdout.write(chalk.yellow(' [dry-run]'));
106
+ if (cfg.tags.freeForm)
107
+ process.stdout.write(chalk.magenta(' [free-form]'));
108
+ process.stdout.write('\n');
109
+ if (cfg.scan.excludeFolders.length) {
110
+ process.stdout.write(chalk.gray(` Excluded: ${cfg.scan.excludeFolders.join(', ')}\n`));
111
+ }
112
+ process.stdout.write('\n');
113
+ let allFiles = await walkFiles(root, cfg);
114
+ process.stdout.write(`📁 Files: ${allFiles.length}\n`);
115
+ if (opts.skipTagged) {
116
+ const before = allFiles.length;
117
+ const filtered = [];
118
+ for (const f of allFiles) {
119
+ try {
120
+ const { stdout: md } = await execFileAsync('mdls', ['-name', 'kMDItemUserTags', '-raw', f], { timeout: 3_000 });
121
+ if (!md.includes(cfg.tags.autoTag))
122
+ filtered.push(f);
123
+ }
124
+ catch {
125
+ filtered.push(f);
126
+ }
127
+ }
128
+ allFiles = filtered;
129
+ process.stdout.write(chalk.gray(` Skip-tagged: ${before - allFiles.length} pominięte, ${allFiles.length} do przetworzenia\n`));
130
+ }
131
+ if (opts.limit && opts.limit > 0 && allFiles.length > opts.limit) {
132
+ allFiles = allFiles.slice(0, opts.limit);
133
+ process.stdout.write(chalk.gray(` Limit: ${opts.limit} plików\n`));
134
+ }
135
+ let dedup;
136
+ if (cfg.dedup.enabled && allFiles.length > 1) {
137
+ process.stdout.write(chalk.gray(`🔢 Hashing ${allFiles.length} files for dedup…\n`));
138
+ dedup = await findDuplicates(allFiles, cfg);
139
+ process.stdout.write(chalk.gray(` Hashed: ${dedup.hashedFiles}, skipped >${cfg.dedup.maxFileSizeMB}MB: ${dedup.skippedLarge}, ` +
140
+ `duplicate groups: ${dedup.totalGroups}, files in groups: ${dedup.totalDuplicates}\n`));
141
+ }
142
+ process.stdout.write('\n');
143
+ const stats = { ok: 0, preOnly: 0, skipped: 0, errors: 0, total: allFiles.length };
144
+ const skipExt = new Set(cfg.scan.skipExtensions);
145
+ const ocrExt = new Set(cfg.scan.ocrExtensions);
146
+ const videoExt = new Set(cfg.scan.videoExtensions);
147
+ const discovery = cfg.tags.freeForm ? new TagDiscovery() : undefined;
148
+ for (const filePath of allFiles) {
149
+ const rel = path.relative(root, filePath);
150
+ const name = path.basename(filePath);
151
+ const ext = path.extname(filePath).toLowerCase();
152
+ if (skipExt.has(ext)) {
153
+ stats.skipped++;
154
+ continue;
155
+ }
156
+ process.stdout.write(chalk.bold(`🔍 ${rel}\n`));
157
+ let ocrText = '';
158
+ if (ocrExt.has(ext)) {
159
+ process.stdout.write(' 📖 OCR…');
160
+ ocrText = await extractOcrText(filePath, cfg);
161
+ const words = ocrText.split(/\s+/).filter(Boolean).length;
162
+ process.stdout.write(` ${words} words\n`);
163
+ }
164
+ else if (videoExt.has(ext)) {
165
+ process.stdout.write(' 🎬 Video\n');
166
+ }
167
+ else {
168
+ process.stdout.write(` 📄 ${ext}\n`);
169
+ }
170
+ const preTagsBase = preTagFromPath(filePath, ocrText, cfg);
171
+ const dupGroup = dedup?.groupByFile.get(filePath);
172
+ const preTags = dupGroup ? mergeTags(cfg, preTagsBase, ['#Duplikat']) : preTagsBase;
173
+ if (dupGroup) {
174
+ const others = dupGroup.files.filter(f => f !== filePath).map(f => path.basename(f));
175
+ process.stdout.write(chalk.magenta(` 🧬 Duplicate of: ${others.join(', ')}\n`));
176
+ }
177
+ let finalTags;
178
+ let finalComment;
179
+ if (preTags.length >= 4 && !ocrText.trim()) {
180
+ finalTags = mergeTags(cfg, preTags, [cfg.tags.autoTag]);
181
+ finalComment = `Auto z nazwy/ścieżki: ${name}.`;
182
+ process.stdout.write(chalk.gray(` ⚡ Pre-only: ${preTags.join(' ')}\n`));
183
+ stats.preOnly++;
184
+ }
185
+ else {
186
+ process.stdout.write(chalk.gray(` 🧠 ${cfg.llm.provider}…\n`));
187
+ const PER_FILE_TIMEOUT_MS = 180_000;
188
+ let timer;
189
+ const timeout = new Promise((_, reject) => {
190
+ timer = setTimeout(() => reject(new Error(`per-file timeout after ${PER_FILE_TIMEOUT_MS}ms`)), PER_FILE_TIMEOUT_MS);
191
+ });
192
+ try {
193
+ const result = await Promise.race([
194
+ inferTagsAndComment({ fileName: name, ext, preTags, ocrText }, cfg, masker, discovery),
195
+ timeout,
196
+ ]);
197
+ finalTags = mergeTags(cfg, result.tags, [cfg.tags.autoTag]);
198
+ finalComment = result.comment || `Plik: ${name}.`;
199
+ }
200
+ catch (err) {
201
+ const msg = err instanceof Error ? err.message : String(err);
202
+ process.stdout.write(chalk.yellow(` ⏱ ${msg} — fallback\n`));
203
+ finalTags = mergeTags(cfg, preTags, [cfg.tags.autoTag]).slice(0, 6);
204
+ finalComment = `Plik: ${name}.`;
205
+ }
206
+ finally {
207
+ if (timer)
208
+ clearTimeout(timer);
209
+ }
210
+ }
211
+ if (opts.dryRun) {
212
+ process.stdout.write(chalk.green(` ✅ ${finalTags.join(' ')}\n`));
213
+ process.stdout.write(chalk.gray(` 📝 ${finalComment}\n\n`));
214
+ stats.ok++;
215
+ continue;
216
+ }
217
+ try {
218
+ await writeFileMetadata(filePath, finalTags, finalComment);
219
+ process.stdout.write(chalk.green(` ✅ ${finalTags.join(' ')}\n`));
220
+ process.stdout.write(chalk.gray(` 📝 ${finalComment}\n\n`));
221
+ stats.ok++;
222
+ }
223
+ catch (err) {
224
+ const msg = err instanceof Error ? err.message : String(err);
225
+ process.stdout.write(chalk.red(` ❌ Write failed: ${msg}\n\n`));
226
+ stats.errors++;
227
+ }
228
+ }
229
+ if (masker)
230
+ await masker.close();
231
+ process.stdout.write('═══════════════════════════════════════════════════════\n');
232
+ process.stdout.write(chalk.bold('✨ Done\n'));
233
+ process.stdout.write(chalk.green(` ✅ Success: ${stats.ok}\n`));
234
+ process.stdout.write(chalk.gray(` ⚡ Pre-only: ${stats.preOnly}\n`));
235
+ process.stdout.write(chalk.gray(` ⏭ Skipped: ${stats.skipped}\n`));
236
+ process.stdout.write(chalk.red(` ❌ Errors: ${stats.errors}\n`));
237
+ if (discovery && discovery.size > 0) {
238
+ process.stdout.write('\n' + chalk.magenta(`🆕 Free-form: LLM zaproponował ${discovery.size} nowych tagów:\n`));
239
+ for (const { tag, count } of discovery.entries().slice(0, 20)) {
240
+ process.stdout.write(chalk.gray(` ${tag} (${count}×)\n`));
241
+ }
242
+ process.stdout.write(chalk.gray('\n Dodaj je do tags.allowed w configu jeśli chcesz je zachować.\n'));
243
+ }
244
+ }
package/dist/config.js CHANGED
@@ -3,12 +3,22 @@ import { existsSync } from 'node:fs';
3
3
  import path from 'node:path';
4
4
  import os from 'node:os';
5
5
  import { z } from 'zod';
6
- import { DEFAULT_CONFIG } from './defaults.js';
6
+ import { DEFAULT_CONFIG, DEFAULT_ORGANIZE } from './defaults.js';
7
7
  const PathRuleSchema = z.object({
8
8
  pattern: z.string(),
9
9
  flags: z.string().optional(),
10
10
  tags: z.array(z.string()),
11
11
  });
12
+ const OrganizeSchema = z.object({
13
+ enabled: z.boolean().default(false),
14
+ target: z.string().default('~/Documents/Sorted'),
15
+ strategy: z.enum(['flat', 'nested', 'custom']).default('flat'),
16
+ priority: z.array(z.string()).default([]),
17
+ folderMap: z.record(z.string(), z.string()).default({}),
18
+ unsorted: z.enum(['keep', 'move', 'skip']).default('move'),
19
+ unsortedFolder: z.string().default('_unsorted'),
20
+ multiTag: z.enum(['primary']).default('primary'),
21
+ }).default(DEFAULT_ORGANIZE);
12
22
  const ConfigSchema = z.object({
13
23
  scan: z.object({
14
24
  folder: z.string(),
@@ -46,7 +56,9 @@ const ConfigSchema = z.object({
46
56
  strictEvidence: z.record(z.string(), z.array(z.string())),
47
57
  pathRules: z.array(PathRuleSchema),
48
58
  autoTag: z.string(),
59
+ freeForm: z.boolean().default(false),
49
60
  }),
61
+ organize: OrganizeSchema,
50
62
  context: z.string(),
51
63
  });
52
64
  export const DEFAULT_CONFIG_PATH = path.join(process.env.XDG_CONFIG_HOME ?? path.join(os.homedir(), '.config'), 'sortai', 'config.json');
@@ -56,8 +68,14 @@ export function expandHome(p) {
56
68
  }
57
69
  return p;
58
70
  }
71
+ export function resolveConfigPath(customPath) {
72
+ return customPath ? path.resolve(expandHome(customPath)) : DEFAULT_CONFIG_PATH;
73
+ }
74
+ export async function configExists(customPath) {
75
+ return existsSync(resolveConfigPath(customPath));
76
+ }
59
77
  export async function loadConfig(customPath) {
60
- const cfgPath = customPath ? path.resolve(expandHome(customPath)) : DEFAULT_CONFIG_PATH;
78
+ const cfgPath = resolveConfigPath(customPath);
61
79
  if (!existsSync(cfgPath)) {
62
80
  await fs.mkdir(path.dirname(cfgPath), { recursive: true });
63
81
  await fs.writeFile(cfgPath, JSON.stringify(DEFAULT_CONFIG, null, 2), 'utf8');
@@ -68,3 +86,13 @@ export async function loadConfig(customPath) {
68
86
  const config = ConfigSchema.parse(parsed);
69
87
  return { config, path: cfgPath, created: false };
70
88
  }
89
+ export async function saveConfig(config, customPath) {
90
+ const cfgPath = resolveConfigPath(customPath);
91
+ await fs.mkdir(path.dirname(cfgPath), { recursive: true });
92
+ if (existsSync(cfgPath)) {
93
+ const backup = `${cfgPath}.bak.${Date.now()}`;
94
+ await fs.copyFile(cfgPath, backup);
95
+ }
96
+ await fs.writeFile(cfgPath, JSON.stringify(config, null, 2), 'utf8');
97
+ return cfgPath;
98
+ }
package/dist/defaults.js CHANGED
@@ -74,6 +74,22 @@ export const DEFAULT_CONTEXT = 'EDIT ME in ~/.config/sortai/config.json. 1-2 sen
74
74
  'used by the LLM as background context to prefer the right tags. ' +
75
75
  'Example: "Self-employed graphic designer in Warsaw. Recurring clients: AcmeCorp, BetaInc. ' +
76
76
  'Documents in PL and EN. Active: tax filings 2024, AcmeCorp branding project."';
77
+ export const DEFAULT_ORGANIZE = {
78
+ enabled: false,
79
+ target: '~/Documents/Sorted',
80
+ strategy: 'flat',
81
+ priority: [
82
+ '#Faktura', '#FakturaProforma', '#Wyciag', '#Bank', '#KartaKredytowa', '#Kredyt', '#Podatki',
83
+ '#Umowa', '#Wniosek', '#Reklamacja', '#Skarga', '#Decyzja', '#Oswiadczenie', '#Ugoda',
84
+ '#Protokol', '#Regulamin', '#Harmonogram', '#Oferta', '#Pismo', '#Korespondencja',
85
+ '#CV', '#Kariera', '#Nieruchomosc', '#Zdrowie', '#RODO',
86
+ '#Email', '#Zalacznik', '#Skan', '#Screenshot', '#Foto', '#Grafika', '#Nagranie',
87
+ ],
88
+ folderMap: {},
89
+ unsorted: 'move',
90
+ unsortedFolder: '_unsorted',
91
+ multiTag: 'primary',
92
+ };
77
93
  export const DEFAULT_CONFIG = {
78
94
  scan: {
79
95
  folder: '~/Desktop',
@@ -110,7 +126,9 @@ export const DEFAULT_CONFIG = {
110
126
  strictEvidence: DEFAULT_STRICT_EVIDENCE,
111
127
  pathRules: DEFAULT_PATH_RULES,
112
128
  autoTag: '#AI_Sorted',
129
+ freeForm: false,
113
130
  },
131
+ organize: DEFAULT_ORGANIZE,
114
132
  context: DEFAULT_CONTEXT,
115
133
  };
116
134
  export const BAD_COMMENT_PHRASES = [
package/dist/llm/index.js CHANGED
@@ -1,6 +1,6 @@
1
- import { mergeTags, normalizeTag, isStrictTag, strictTagHasEvidence } from '../tags.js';
1
+ import { mergeTags, normalizeTag, isStrictTag, strictTagHasEvidence, TAG_SHAPE } from '../tags.js';
2
2
  import { BAD_COMMENT_PHRASES } from '../defaults.js';
3
- import { buildPrompt, parseJsonSafe } from './prompt.js';
3
+ import { buildPrompt, buildTaxonomyPrompt, parseJsonSafe } from './prompt.js';
4
4
  import { callOllama } from './local.js';
5
5
  import { callAnthropic, callOpenAi } from './cloud.js';
6
6
  async function dispatchProvider(prompt, cfg) {
@@ -14,7 +14,7 @@ async function dispatchProvider(prompt, cfg) {
14
14
  return callOllama(prompt, cfg);
15
15
  }
16
16
  }
17
- export async function inferTagsAndComment(req, cfg, masker) {
17
+ export async function inferTagsAndComment(req, cfg, masker, discovery) {
18
18
  const fallback = {
19
19
  tags: mergeTags(cfg, req.preTags).slice(0, 6),
20
20
  comment: `Plik: ${req.fileName}.`,
@@ -48,8 +48,9 @@ export async function inferTagsAndComment(req, cfg, masker) {
48
48
  const rawTags = Array.isArray(data.tags) ? data.tags : [];
49
49
  const evidence = (req.fileName + ' ' + req.ocrText).toLowerCase();
50
50
  const cleaned = [];
51
+ const allowedSet = new Set([...cfg.tags.allowed, cfg.tags.autoTag]);
51
52
  for (const t of rawTags) {
52
- const n = normalizeTag(t, cfg);
53
+ const n = normalizeTag(t, cfg, cfg.tags.freeForm);
53
54
  if (!n)
54
55
  continue;
55
56
  if (isStrictTag(n, cfg)) {
@@ -59,6 +60,9 @@ export async function inferTagsAndComment(req, cfg, masker) {
59
60
  else {
60
61
  cleaned.push(n);
61
62
  }
63
+ if (cfg.tags.freeForm && !allowedSet.has(n)) {
64
+ discovery?.record(n);
65
+ }
62
66
  }
63
67
  applyContextualGuards(cleaned, req, cfg, evidence);
64
68
  const strictFound = cleaned.filter(t => isStrictTag(t, cfg));
@@ -85,6 +89,45 @@ export async function inferTagsAndComment(req, cfg, masker) {
85
89
  comment: comment.slice(0, 500),
86
90
  };
87
91
  }
92
+ export async function inferTaxonomy(samples, langs, userContext, cfg, hint) {
93
+ const prompt = buildTaxonomyPrompt(samples, langs, userContext, hint);
94
+ // Taksonomia to duży JSON (8-15 kategorii z aliasami, evidence, examples) —
95
+ // domyślne 300 tokenów ucinają output w połowie. Wymuszamy 2000.
96
+ const taxCfg = { ...cfg, llm: { ...cfg.llm, numPredict: Math.max(cfg.llm.numPredict, 2000) } };
97
+ const raw = await dispatchProvider(prompt, taxCfg);
98
+ const data = parseJsonSafe(raw);
99
+ const categories = Array.isArray(data.categories) ? data.categories : [];
100
+ const parsed = [];
101
+ for (const c of categories) {
102
+ if (!c || typeof c !== 'object')
103
+ continue;
104
+ const obj = c;
105
+ const name = typeof obj.name === 'string' ? obj.name.trim() : '';
106
+ if (!name)
107
+ continue;
108
+ const normalized = name.startsWith('#') ? name : `#${name}`;
109
+ if (!TAG_SHAPE.test(normalized))
110
+ continue;
111
+ parsed.push({
112
+ name: normalized,
113
+ description: typeof obj.description === 'string' ? obj.description : '',
114
+ aliases: Array.isArray(obj.aliases)
115
+ ? obj.aliases.filter((x) => typeof x === 'string').map(s => s.startsWith('#') ? s : `#${s}`)
116
+ : [],
117
+ strictEvidence: Array.isArray(obj.strict_evidence)
118
+ ? obj.strict_evidence.filter((x) => typeof x === 'string')
119
+ : [],
120
+ isStrict: Boolean(obj.is_strict),
121
+ examples: Array.isArray(obj.examples)
122
+ ? obj.examples.filter((x) => typeof x === 'string')
123
+ : [],
124
+ });
125
+ }
126
+ return {
127
+ categories: parsed,
128
+ summary: typeof data.summary === 'string' ? data.summary : '',
129
+ };
130
+ }
88
131
  function isBadComment(c) {
89
132
  if (!c)
90
133
  return true;
@@ -0,0 +1,35 @@
1
+ export async function probeOllama(url, timeoutMs = 2_000) {
2
+ const base = url.replace(/\/$/, '');
3
+ const controller = new AbortController();
4
+ const t = setTimeout(() => controller.abort(), timeoutMs);
5
+ try {
6
+ const res = await fetch(`${base}/api/tags`, { signal: controller.signal });
7
+ if (!res.ok) {
8
+ return { reachable: false, models: [], error: `HTTP ${res.status}` };
9
+ }
10
+ const data = (await res.json());
11
+ const models = (data.models ?? [])
12
+ .filter((m) => typeof m.name === 'string' && typeof m.size === 'number' && typeof m.modified_at === 'string')
13
+ .map(m => ({ name: m.name, size: m.size, modified: m.modified_at }));
14
+ return { reachable: true, models };
15
+ }
16
+ catch (err) {
17
+ const msg = err instanceof Error ? err.message : String(err);
18
+ return { reachable: false, models: [], error: msg };
19
+ }
20
+ finally {
21
+ clearTimeout(t);
22
+ }
23
+ }
24
+ export function modelSizeLabel(bytes) {
25
+ const gb = bytes / 1024 / 1024 / 1024;
26
+ if (gb >= 1)
27
+ return `${gb.toFixed(1)} GB`;
28
+ const mb = bytes / 1024 / 1024;
29
+ return `${mb.toFixed(0)} MB`;
30
+ }
31
+ export const SUGGESTED_OLLAMA_MODELS = [
32
+ { name: 'mistral-nemo', note: '12B — dobry baseline, szybkie' },
33
+ { name: 'qwen2.5:14b', note: '14B — lepsza taksonomia' },
34
+ { name: 'llama3.1', note: '8B — najszybsze, słabsze taksonomie' },
35
+ ];
@@ -54,6 +54,37 @@ export function buildPrompt(req, cfg, ocrTextForPrompt) {
54
54
  '3. BĄDŹ SCEPTYCZNY. Lepiej dać 1 tag (#Grafika) niż 5 błędnych.',
55
55
  ].join('\n');
56
56
  }
57
+ export function buildTaxonomyPrompt(samples, langs, userContext, hint) {
58
+ const samplesBlock = samples
59
+ .map((s, i) => {
60
+ const trimmed = s.ocrText.slice(0, 600).trim() || '(brak tekstu OCR – plik graficzny/wideo)';
61
+ return `--- Plik ${i + 1} ---\nnazwa: ${s.name}\nrozszerzenie: ${s.ext}\nOCR:\n${trimmed}`;
62
+ })
63
+ .join('\n\n');
64
+ const langLabel = langs.length === 0 ? 'nieznany' : langs.join(' + ');
65
+ return [
66
+ 'Jesteś asystentem budującym taksonomię tagów Findera dla prywatnej kolekcji plików.',
67
+ 'Poniżej znajdziesz próbki plików (nazwa + fragment OCR). Zaproponuj 8-15 ZRÓŻNICOWANYCH kategorii.',
68
+ '',
69
+ `Wykryte języki: ${langLabel}`,
70
+ `Kontekst użytkownika: ${userContext || '(brak)'}`,
71
+ hint ? `Dodatkowa wskazówka: ${hint}` : '',
72
+ '',
73
+ 'Dla każdej kategorii podaj:',
74
+ '- name: nazwa tagu w formacie #PascalCase (jedno słowo, bez spacji, np. #Faktura)',
75
+ '- description: jedno zdanie po polsku',
76
+ '- aliases: tablica nazw w innych wykrytych językach (np. ["#Invoice"])',
77
+ '- strict_evidence: 3-5 słów-dowodów które MUSZĄ pojawić się w OCR/nazwie żeby tag był nałożony (dla kategorii wrażliwych: finansowych, prawnych, zdrowotnych)',
78
+ '- is_strict: true tylko dla wrażliwych kategorii (Bank, Faktura, RODO, Zdrowie, Podatki, KartaKredytowa, Kredyt)',
79
+ '- examples: tablica nazw plików z próbki które pasują do tej kategorii',
80
+ '',
81
+ 'Zwróć WYŁĄCZNIE JSON w formacie:',
82
+ '{"categories": [{"name":"#X","description":"...","aliases":["#Y"],"strict_evidence":["..."],"is_strict":false,"examples":["..."]}], "summary":"jedno zdanie podsumowania"}',
83
+ '',
84
+ 'PRÓBKI:',
85
+ samplesBlock,
86
+ ].filter(Boolean).join('\n');
87
+ }
57
88
  export function parseJsonSafe(raw) {
58
89
  const cleaned = raw.trim();
59
90
  try {