@woladi/sortai 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -14
- package/dist/cli.js +59 -284
- package/dist/commands/clear.js +55 -0
- package/dist/commands/init.js +32 -0
- package/dist/commands/organize.js +92 -0
- package/dist/commands/sample.js +69 -0
- package/dist/commands/tag.js +244 -0
- package/dist/config.js +30 -2
- package/dist/defaults.js +18 -0
- package/dist/llm/index.js +47 -4
- package/dist/llm/ollama-detect.js +35 -0
- package/dist/llm/prompt.js +31 -0
- package/dist/mask.js +27 -12
- package/dist/organize/move.js +20 -0
- package/dist/organize/plan.js +84 -0
- package/dist/organize/read-tags.js +18 -0
- package/dist/tags.js +32 -3
- package/dist/wizard/index.js +359 -0
- package/dist/wizard/languages.js +34 -0
- package/dist/wizard/refine.js +195 -0
- package/dist/wizard/sample.js +41 -0
- package/dist/wizard/taxonomy.js +57 -0
- package/package.json +8 -2
package/README.md
CHANGED
|
@@ -6,6 +6,9 @@
|
|
|
6
6
|
|
|
7
7
|
> macOS CLI that scans a folder, reads every document with **Apple Vision OCR**, and automatically writes **Finder tags** and **Finder comments** — so your files become searchable in Spotlight and browsable by tag in Finder. Runs fully offline by default. Cloud LLMs optional.
|
|
8
8
|
|
|
9
|
+
> [!NOTE]
|
|
10
|
+
> **Pre-1.0, work in progress.** CLI messages and wizard prompts are currently hardcoded in Polish — DX preference of the maintainer. The README, config keys, and source code are in English. There's no language toggle yet; if you'd like an English UI, open an issue.
|
|
11
|
+
|
|
9
12
|
## What it does
|
|
10
13
|
|
|
11
14
|
`sortai` walks a folder recursively, reads the content of PDFs and images using Apple's on-device Vision framework (via [`macos-vision`](https://www.npmjs.com/package/macos-vision)), and uses a language model to infer what the file is about. It then writes that understanding directly into the file's macOS metadata:
|
|
@@ -95,51 +98,73 @@ When `--mask` is set, `sortai` spawns [`pseudonym-mcp`](https://www.npmjs.com/pa
|
|
|
95
98
|
## Quick start
|
|
96
99
|
|
|
97
100
|
```bash
|
|
98
|
-
# First run
|
|
101
|
+
# First run with no config launches an interactive wizard:
|
|
102
|
+
# - asks what mode you want (tag / organize / both / discovery)
|
|
103
|
+
# - asks for Ollama vs Anthropic vs OpenAI, picks model
|
|
104
|
+
# - samples ~30 files, runs OCR, asks the LLM to propose a taxonomy
|
|
105
|
+
# - lets you refine tags, then writes ~/.config/sortai/config.json
|
|
99
106
|
npx @woladi/sortai
|
|
100
107
|
|
|
108
|
+
# Or invoke the wizard explicitly
|
|
109
|
+
npx @woladi/sortai init ~/Desktop
|
|
110
|
+
|
|
101
111
|
# Dry-run: see what tags would be written, without touching any files
|
|
102
|
-
npx @woladi/sortai ~/Desktop --dry-run
|
|
112
|
+
npx @woladi/sortai tag ~/Desktop --dry-run
|
|
103
113
|
|
|
104
114
|
# Actually write Finder tags and comments
|
|
105
|
-
npx @woladi/sortai ~/Desktop
|
|
115
|
+
npx @woladi/sortai tag ~/Desktop
|
|
116
|
+
|
|
117
|
+
# Move files into folders based on Finder tags already on them
|
|
118
|
+
npx @woladi/sortai organize ~/Desktop --apply
|
|
119
|
+
|
|
120
|
+
# Try the pipeline on 10 random files without writing anything
|
|
121
|
+
npx @woladi/sortai sample ~/Desktop -n 10
|
|
106
122
|
```
|
|
107
123
|
|
|
108
|
-
> The first invocation
|
|
124
|
+
> The first invocation without a config opens the interactive wizard. You can re-open it any time with `sortai init` to regenerate the taxonomy.
|
|
125
|
+
|
|
126
|
+
### Commands
|
|
127
|
+
|
|
128
|
+
| Command | What it does |
|
|
129
|
+
|---------|--------------|
|
|
130
|
+
| `sortai init [folder]` | Interactive wizard — picks mode/provider/model, samples files, generates and refines a tag taxonomy, writes the config. |
|
|
131
|
+
| `sortai tag [folder]` | Default. OCR + LLM → Finder tags & comments. Same flags as before. |
|
|
132
|
+
| `sortai organize [folder]` | Read existing Finder tags, move files into folders. Default dry-run; pass `--apply` to execute. |
|
|
133
|
+
| `sortai clear [folder]` | Remove all sortai tags & comments from files. |
|
|
134
|
+
| `sortai sample [folder]` | Dry-run the pipeline on N random files (default 20). Useful after editing the config. |
|
|
109
135
|
|
|
110
136
|
### Reset metadata before a fresh run
|
|
111
137
|
|
|
112
138
|
```bash
|
|
113
139
|
# Remove all Finder tags and comments sortai previously wrote
|
|
114
|
-
npx @woladi/sortai ~/Desktop
|
|
140
|
+
npx @woladi/sortai clear ~/Desktop
|
|
115
141
|
|
|
116
142
|
# Preview what would be cleared without touching files
|
|
117
|
-
npx @woladi/sortai ~/Desktop --
|
|
143
|
+
npx @woladi/sortai clear ~/Desktop --dry-run
|
|
118
144
|
```
|
|
119
145
|
|
|
120
|
-
After
|
|
146
|
+
After `sortai clear`, Spotlight is reindexed automatically (`mdimport`) so stale tags disappear from search immediately. Combine with a config change and re-run to start fresh with a new taxonomy.
|
|
121
147
|
|
|
122
148
|
### Cloud mode (optional)
|
|
123
149
|
|
|
124
150
|
```bash
|
|
125
151
|
# Anthropic Claude — OCR text sent to the API
|
|
126
|
-
npx @woladi/sortai ~/Desktop --cloud anthropic --api-key sk-ant-...
|
|
152
|
+
npx @woladi/sortai tag ~/Desktop --cloud anthropic --api-key sk-ant-...
|
|
127
153
|
|
|
128
154
|
# With PII pseudonymisation: only tokens like [PESEL:1] reach the cloud
|
|
129
|
-
npx @woladi/sortai ~/Desktop --cloud anthropic --mask --api-key sk-ant-...
|
|
155
|
+
npx @woladi/sortai tag ~/Desktop --cloud anthropic --mask --api-key sk-ant-...
|
|
130
156
|
|
|
131
157
|
# OpenAI
|
|
132
|
-
OPENAI_API_KEY=sk-... npx @woladi/sortai ~/Desktop --cloud openai
|
|
158
|
+
OPENAI_API_KEY=sk-... npx @woladi/sortai tag ~/Desktop --cloud openai
|
|
133
159
|
```
|
|
134
160
|
|
|
135
|
-
## CLI flags
|
|
161
|
+
## CLI flags (for `tag`, the default subcommand)
|
|
136
162
|
|
|
137
163
|
| Flag | Default | Description |
|
|
138
164
|
|------|---------|-------------|
|
|
139
165
|
| `<folder>` | from config | Folder to scan recursively |
|
|
140
166
|
| `--config <path>` | `~/.config/sortai/config.json` | Alternative config file |
|
|
141
167
|
| `--dry-run` | off | Print results without writing tags/comments |
|
|
142
|
-
| `--clear` | off | Remove all sortai-written Finder tags and comments from every file in the folder |
|
|
143
168
|
| `--model <name>` | `mistral-nemo` (Ollama) | LLM model name |
|
|
144
169
|
| `--ollama-url <url>` | `http://localhost:11434` | Ollama server |
|
|
145
170
|
| `--cloud anthropic\|openai` | — | Switch to a cloud LLM |
|
|
@@ -150,11 +175,14 @@ OPENAI_API_KEY=sk-... npx @woladi/sortai ~/Desktop --cloud openai
|
|
|
150
175
|
| `--limit <n>` | — | Process at most N files |
|
|
151
176
|
| `--skip-tagged` | off | Skip files that already carry `cfg.tags.autoTag` (`#AI_Sorted`) |
|
|
152
177
|
| `--no-dedup` | off | Skip SHA256 duplicate detection |
|
|
178
|
+
| `--free` | off | Let the LLM invent new tags outside `tags.allowed`; new tags are reported at the end |
|
|
153
179
|
| `--verbose` | off | Extra logs |
|
|
154
180
|
|
|
181
|
+
For `organize`: `--target <path>` overrides destination, `--apply` is required to actually move (default is dry-run). For `sample`: `-n <count>` selects how many files to test.
|
|
182
|
+
|
|
155
183
|
## Configuration
|
|
156
184
|
|
|
157
|
-
The first run writes `~/.config/sortai/config.json
|
|
185
|
+
The first run launches `sortai init`, which writes `~/.config/sortai/config.json` after you answer the wizard. You can also edit it by hand. Layout:
|
|
158
186
|
|
|
159
187
|
```json
|
|
160
188
|
{
|
|
@@ -187,7 +215,18 @@ The first run writes `~/.config/sortai/config.json`. Edit it to fit your taxonom
|
|
|
187
215
|
{ "pattern": "\\bbank\\b|iban|rachunek", "flags": "i", "tags": ["#Bank"] },
|
|
188
216
|
{ "pattern": "faktura|invoice", "flags": "i", "tags": ["#Faktura"] }
|
|
189
217
|
],
|
|
190
|
-
"autoTag": "#AI_Sorted"
|
|
218
|
+
"autoTag": "#AI_Sorted",
|
|
219
|
+
"freeForm": false
|
|
220
|
+
},
|
|
221
|
+
"organize": {
|
|
222
|
+
"enabled": false,
|
|
223
|
+
"target": "~/Documents/Sorted",
|
|
224
|
+
"strategy": "flat",
|
|
225
|
+
"priority": ["#Faktura", "#Bank", "#Umowa"],
|
|
226
|
+
"folderMap": {},
|
|
227
|
+
"unsorted": "move",
|
|
228
|
+
"unsortedFolder": "_unsorted",
|
|
229
|
+
"multiTag": "primary"
|
|
191
230
|
},
|
|
192
231
|
"context": "1-2 sentence description of yourself and ongoing matters — used by the LLM as background."
|
|
193
232
|
}
|
|
@@ -199,6 +238,7 @@ Key options:
|
|
|
199
238
|
- **`tags.strict`** — subset of `allowed`. A strict tag only lands on a file if at least one `strictEvidence` keyword appears verbatim in OCR or filename. Prevents false positives on sensitive categories like `#Bank` or `#Kredyt`.
|
|
200
239
|
- **`tags.autoTag`** — appended to every successfully processed file. Used as a sentinel by `--skip-tagged` so you don't re-process files on the next run.
|
|
201
240
|
- **`tags.pathRules`** — regex rules matched against the full filepath + OCR text. Matched tags become *pre-tags* that are always included and passed to the LLM as hints.
|
|
241
|
+
- **`tags.freeForm`** — when `true`, the LLM may propose tags outside `allowed`; new tags pass a shape check (`#[Unicode-letter/digit/_-]+`, so `#Płatność` works) and are reported in the run summary so you can promote them into `allowed` if you want to keep them. Equivalent to the `--free` CLI flag.
|
|
202
242
|
- **`ocr.startPage` / `ocr.maxPages`** — PDF page range. Default reads pages 1–2; raise `maxPages` for long documents where the key content is deeper.
|
|
203
243
|
- **`context`** — one or two sentences about yourself pinned to the LLM system prompt. The model uses this as background when writing comments (e.g. knowing you're a freelancer or a specific sector helps contextualise ambiguous documents).
|
|
204
244
|
|
package/dist/cli.js
CHANGED
|
@@ -1,296 +1,71 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { Command } from 'commander';
|
|
3
3
|
import chalk from 'chalk';
|
|
4
|
-
import
|
|
5
|
-
import
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
import {
|
|
9
|
-
const execFileAsync = promisify(execFile);
|
|
10
|
-
import { expandHome, loadConfig } from './config.js';
|
|
11
|
-
import { walkFiles } from './walker.js';
|
|
12
|
-
import { extractOcrText } from './ocr.js';
|
|
13
|
-
import { preTagFromPath } from './pretag.js';
|
|
14
|
-
import { mergeTags } from './tags.js';
|
|
15
|
-
import { writeFileMetadata, clearMacosMetadata } from './macos.js';
|
|
16
|
-
import { Masker } from './mask.js';
|
|
17
|
-
import { inferTagsAndComment } from './llm/index.js';
|
|
18
|
-
import { findDuplicates } from './dedup.js';
|
|
19
|
-
function applyOverrides(cfg, opts) {
|
|
20
|
-
const apiKey = opts.apiKey
|
|
21
|
-
?? process.env.SORTAI_API_KEY
|
|
22
|
-
?? (opts.cloud === 'anthropic' ? process.env.ANTHROPIC_API_KEY : undefined)
|
|
23
|
-
?? (opts.cloud === 'openai' ? process.env.OPENAI_API_KEY : undefined);
|
|
24
|
-
const provider = opts.cloud ?? 'ollama';
|
|
25
|
-
const defaultCloudModels = {
|
|
26
|
-
anthropic: 'claude-sonnet-4-6',
|
|
27
|
-
openai: 'gpt-4o-mini',
|
|
28
|
-
};
|
|
29
|
-
return {
|
|
30
|
-
...cfg,
|
|
31
|
-
scan: {
|
|
32
|
-
...cfg.scan,
|
|
33
|
-
excludeFolders: opts.exclude
|
|
34
|
-
? opts.exclude.split(',').map(s => s.trim()).filter(Boolean)
|
|
35
|
-
: cfg.scan.excludeFolders,
|
|
36
|
-
},
|
|
37
|
-
llm: {
|
|
38
|
-
...cfg.llm,
|
|
39
|
-
provider,
|
|
40
|
-
model: opts.model ?? (opts.cloud ? defaultCloudModels[opts.cloud] ?? cfg.llm.model : cfg.llm.model),
|
|
41
|
-
ollamaUrl: opts.ollamaUrl ?? cfg.llm.ollamaUrl,
|
|
42
|
-
apiKey,
|
|
43
|
-
},
|
|
44
|
-
mask: {
|
|
45
|
-
...cfg.mask,
|
|
46
|
-
enabled: opts.mask,
|
|
47
|
-
lang: opts.lang ?? cfg.mask.lang,
|
|
48
|
-
},
|
|
49
|
-
dedup: {
|
|
50
|
-
...cfg.dedup,
|
|
51
|
-
enabled: opts.dedup,
|
|
52
|
-
},
|
|
53
|
-
};
|
|
54
|
-
}
|
|
4
|
+
import { initCommand } from './commands/init.js';
|
|
5
|
+
import { tagCommand } from './commands/tag.js';
|
|
6
|
+
import { organizeCommand } from './commands/organize.js';
|
|
7
|
+
import { clearCommand } from './commands/clear.js';
|
|
8
|
+
import { sampleCommand } from './commands/sample.js';
|
|
55
9
|
async function main() {
|
|
56
10
|
const program = new Command();
|
|
57
11
|
program
|
|
58
12
|
.name('sortai')
|
|
59
|
-
.description('macOS CLI
|
|
60
|
-
.version('0.
|
|
61
|
-
|
|
62
|
-
.
|
|
63
|
-
.
|
|
64
|
-
.option('--
|
|
65
|
-
.option('--
|
|
66
|
-
.
|
|
67
|
-
|
|
68
|
-
.
|
|
69
|
-
.
|
|
70
|
-
.option('--
|
|
71
|
-
.option('--
|
|
72
|
-
.option('--
|
|
73
|
-
.option('--
|
|
74
|
-
.option('--
|
|
75
|
-
.option('--
|
|
76
|
-
.
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
process.exit(1);
|
|
110
|
-
}
|
|
111
|
-
let masker;
|
|
112
|
-
if (cfg.mask.enabled && cfg.llm.provider !== 'ollama') {
|
|
113
|
-
masker = new Masker(cfg);
|
|
114
|
-
const spin = ora('Starting pseudonym-mcp…').start();
|
|
115
|
-
try {
|
|
116
|
-
await masker.connect();
|
|
117
|
-
spin.succeed('pseudonym-mcp ready');
|
|
118
|
-
}
|
|
119
|
-
catch (err) {
|
|
120
|
-
spin.fail(err instanceof Error ? err.message : String(err));
|
|
121
|
-
masker = undefined;
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
if (opts.clear) {
|
|
125
|
-
process.stdout.write(chalk.cyan(`🧹 Clearing sortai metadata from ${root}\n`));
|
|
126
|
-
if (opts.dryRun)
|
|
127
|
-
process.stdout.write(chalk.yellow(' [dry-run — no changes will be written]\n'));
|
|
128
|
-
process.stdout.write('\n');
|
|
129
|
-
const clearFiles = await walkFiles(root, cfg);
|
|
130
|
-
let cleared = 0;
|
|
131
|
-
let clearErrors = 0;
|
|
132
|
-
for (const filePath of clearFiles) {
|
|
133
|
-
const rel = path.relative(root, filePath);
|
|
134
|
-
if (opts.dryRun) {
|
|
135
|
-
process.stdout.write(chalk.gray(` 🗑 ${rel}\n`));
|
|
136
|
-
cleared++;
|
|
137
|
-
continue;
|
|
138
|
-
}
|
|
139
|
-
try {
|
|
140
|
-
await clearMacosMetadata(filePath);
|
|
141
|
-
execFileAsync('mdimport', [filePath]).catch(() => { });
|
|
142
|
-
if (opts.verbose)
|
|
143
|
-
process.stdout.write(chalk.gray(` 🗑 ${rel}\n`));
|
|
144
|
-
cleared++;
|
|
145
|
-
}
|
|
146
|
-
catch {
|
|
147
|
-
process.stdout.write(chalk.red(` ❌ ${rel}\n`));
|
|
148
|
-
clearErrors++;
|
|
149
|
-
}
|
|
150
|
-
}
|
|
151
|
-
process.stdout.write('═══════════════════════════════════════════════════════\n');
|
|
152
|
-
process.stdout.write(chalk.bold('✨ Done\n'));
|
|
153
|
-
process.stdout.write(chalk.green(` 🗑 Cleared: ${cleared}\n`));
|
|
154
|
-
if (clearErrors)
|
|
155
|
-
process.stdout.write(chalk.red(` ❌ Errors: ${clearErrors}\n`));
|
|
156
|
-
return;
|
|
157
|
-
}
|
|
158
|
-
process.stdout.write(chalk.cyan(`🚀 Scanning ${root}\n`));
|
|
159
|
-
process.stdout.write(` Provider: ${cfg.llm.provider} (${cfg.llm.model})`);
|
|
160
|
-
if (cfg.mask.enabled && masker)
|
|
161
|
-
process.stdout.write(chalk.gray(' [masked]'));
|
|
162
|
-
if (opts.dryRun)
|
|
163
|
-
process.stdout.write(chalk.yellow(' [dry-run]'));
|
|
164
|
-
process.stdout.write('\n');
|
|
165
|
-
if (cfg.scan.excludeFolders.length) {
|
|
166
|
-
process.stdout.write(chalk.gray(` Excluded: ${cfg.scan.excludeFolders.join(', ')}\n`));
|
|
167
|
-
}
|
|
168
|
-
process.stdout.write('\n');
|
|
169
|
-
let allFiles = await walkFiles(root, cfg);
|
|
170
|
-
process.stdout.write(`📁 Files: ${allFiles.length}\n`);
|
|
171
|
-
if (opts.skipTagged) {
|
|
172
|
-
const before = allFiles.length;
|
|
173
|
-
const filtered = [];
|
|
174
|
-
for (const f of allFiles) {
|
|
175
|
-
try {
|
|
176
|
-
const { stdout: md } = await execFileAsync('mdls', ['-name', 'kMDItemUserTags', '-raw', f], { timeout: 3_000 });
|
|
177
|
-
if (!md.includes(cfg.tags.autoTag))
|
|
178
|
-
filtered.push(f);
|
|
179
|
-
}
|
|
180
|
-
catch {
|
|
181
|
-
filtered.push(f);
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
allFiles = filtered;
|
|
185
|
-
process.stdout.write(chalk.gray(` Skip-tagged: ${before - allFiles.length} pominięte, ${allFiles.length} do przetworzenia\n`));
|
|
186
|
-
}
|
|
187
|
-
if (opts.limit && opts.limit > 0 && allFiles.length > opts.limit) {
|
|
188
|
-
allFiles = allFiles.slice(0, opts.limit);
|
|
189
|
-
process.stdout.write(chalk.gray(` Limit: ${opts.limit} plików\n`));
|
|
190
|
-
}
|
|
191
|
-
let dedup;
|
|
192
|
-
if (cfg.dedup.enabled && allFiles.length > 1) {
|
|
193
|
-
process.stdout.write(chalk.gray(`🔢 Hashing ${allFiles.length} files for dedup…\n`));
|
|
194
|
-
dedup = await findDuplicates(allFiles, cfg);
|
|
195
|
-
process.stdout.write(chalk.gray(` Hashed: ${dedup.hashedFiles}, skipped >${cfg.dedup.maxFileSizeMB}MB: ${dedup.skippedLarge}, ` +
|
|
196
|
-
`duplicate groups: ${dedup.totalGroups}, files in groups: ${dedup.totalDuplicates}\n`));
|
|
197
|
-
}
|
|
198
|
-
process.stdout.write('\n');
|
|
199
|
-
const stats = { ok: 0, preOnly: 0, skipped: 0, errors: 0, total: allFiles.length };
|
|
200
|
-
const skipExt = new Set(cfg.scan.skipExtensions);
|
|
201
|
-
const ocrExt = new Set(cfg.scan.ocrExtensions);
|
|
202
|
-
const videoExt = new Set(cfg.scan.videoExtensions);
|
|
203
|
-
for (const filePath of allFiles) {
|
|
204
|
-
const rel = path.relative(root, filePath);
|
|
205
|
-
const name = path.basename(filePath);
|
|
206
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
207
|
-
if (skipExt.has(ext)) {
|
|
208
|
-
stats.skipped++;
|
|
209
|
-
continue;
|
|
210
|
-
}
|
|
211
|
-
process.stdout.write(chalk.bold(`🔍 ${rel}\n`));
|
|
212
|
-
let ocrText = '';
|
|
213
|
-
if (ocrExt.has(ext)) {
|
|
214
|
-
process.stdout.write(' 📖 OCR…');
|
|
215
|
-
ocrText = await extractOcrText(filePath, cfg);
|
|
216
|
-
const words = ocrText.split(/\s+/).filter(Boolean).length;
|
|
217
|
-
process.stdout.write(` ${words} words\n`);
|
|
218
|
-
}
|
|
219
|
-
else if (videoExt.has(ext)) {
|
|
220
|
-
process.stdout.write(' 🎬 Video\n');
|
|
221
|
-
}
|
|
222
|
-
else {
|
|
223
|
-
process.stdout.write(` 📄 ${ext}\n`);
|
|
224
|
-
}
|
|
225
|
-
const preTagsBase = preTagFromPath(filePath, ocrText, cfg);
|
|
226
|
-
const dupGroup = dedup?.groupByFile.get(filePath);
|
|
227
|
-
const preTags = dupGroup ? mergeTags(cfg, preTagsBase, ['#Duplikat']) : preTagsBase;
|
|
228
|
-
if (dupGroup) {
|
|
229
|
-
const others = dupGroup.files.filter(f => f !== filePath).map(f => path.basename(f));
|
|
230
|
-
process.stdout.write(chalk.magenta(` 🧬 Duplicate of: ${others.join(', ')}\n`));
|
|
231
|
-
}
|
|
232
|
-
let finalTags;
|
|
233
|
-
let finalComment;
|
|
234
|
-
if (preTags.length >= 4 && !ocrText.trim()) {
|
|
235
|
-
finalTags = mergeTags(cfg, preTags, [cfg.tags.autoTag]);
|
|
236
|
-
finalComment = `Auto z nazwy/ścieżki: ${name}.`;
|
|
237
|
-
process.stdout.write(chalk.gray(` ⚡ Pre-only: ${preTags.join(' ')}\n`));
|
|
238
|
-
stats.preOnly++;
|
|
239
|
-
}
|
|
240
|
-
else {
|
|
241
|
-
process.stdout.write(chalk.gray(` 🧠 ${cfg.llm.provider}…\n`));
|
|
242
|
-
const PER_FILE_TIMEOUT_MS = 180_000;
|
|
243
|
-
let timer;
|
|
244
|
-
const timeout = new Promise((_, reject) => {
|
|
245
|
-
timer = setTimeout(() => reject(new Error(`per-file timeout after ${PER_FILE_TIMEOUT_MS}ms`)), PER_FILE_TIMEOUT_MS);
|
|
246
|
-
});
|
|
247
|
-
try {
|
|
248
|
-
const result = await Promise.race([
|
|
249
|
-
inferTagsAndComment({ fileName: name, ext, preTags, ocrText }, cfg, masker),
|
|
250
|
-
timeout,
|
|
251
|
-
]);
|
|
252
|
-
finalTags = mergeTags(cfg, result.tags, [cfg.tags.autoTag]);
|
|
253
|
-
finalComment = result.comment || `Plik: ${name}.`;
|
|
254
|
-
}
|
|
255
|
-
catch (err) {
|
|
256
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
257
|
-
process.stdout.write(chalk.yellow(` ⏱ ${msg} — fallback\n`));
|
|
258
|
-
finalTags = mergeTags(cfg, preTags, [cfg.tags.autoTag]).slice(0, 6);
|
|
259
|
-
finalComment = `Plik: ${name}.`;
|
|
260
|
-
}
|
|
261
|
-
finally {
|
|
262
|
-
if (timer)
|
|
263
|
-
clearTimeout(timer);
|
|
264
|
-
}
|
|
265
|
-
}
|
|
266
|
-
if (opts.dryRun) {
|
|
267
|
-
process.stdout.write(chalk.green(` ✅ ${finalTags.join(' ')}\n`));
|
|
268
|
-
process.stdout.write(chalk.gray(` 📝 ${finalComment}\n\n`));
|
|
269
|
-
stats.ok++;
|
|
270
|
-
continue;
|
|
271
|
-
}
|
|
272
|
-
try {
|
|
273
|
-
await writeFileMetadata(filePath, finalTags, finalComment);
|
|
274
|
-
process.stdout.write(chalk.green(` ✅ ${finalTags.join(' ')}\n`));
|
|
275
|
-
process.stdout.write(chalk.gray(` 📝 ${finalComment}\n\n`));
|
|
276
|
-
stats.ok++;
|
|
277
|
-
}
|
|
278
|
-
catch (err) {
|
|
279
|
-
const msg = err instanceof Error ? err.message : String(err);
|
|
280
|
-
process.stdout.write(chalk.red(` ❌ Write failed: ${msg}\n\n`));
|
|
281
|
-
stats.errors++;
|
|
282
|
-
}
|
|
283
|
-
}
|
|
284
|
-
if (masker)
|
|
285
|
-
await masker.close();
|
|
286
|
-
process.stdout.write('═══════════════════════════════════════════════════════\n');
|
|
287
|
-
process.stdout.write(chalk.bold('✨ Done\n'));
|
|
288
|
-
process.stdout.write(chalk.green(` ✅ Success: ${stats.ok}\n`));
|
|
289
|
-
process.stdout.write(chalk.gray(` ⚡ Pre-only: ${stats.preOnly}\n`));
|
|
290
|
-
process.stdout.write(chalk.gray(` ⏭ Skipped: ${stats.skipped}\n`));
|
|
291
|
-
process.stdout.write(chalk.red(` ❌ Errors: ${stats.errors}\n`));
|
|
13
|
+
.description('macOS CLI: OCR + LLM → Finder tags, komentarze i sortowanie do folderów')
|
|
14
|
+
.version('0.2.0');
|
|
15
|
+
program
|
|
16
|
+
.command('init [folder]')
|
|
17
|
+
.description('Interaktywny wizard: zbuduj config z Twoich plików')
|
|
18
|
+
.option('--config <path>', 'ścieżka do configu')
|
|
19
|
+
.option('--api-key <key>', 'klucz API (cloud)')
|
|
20
|
+
.action((folder, opts) => initCommand(folder, opts));
|
|
21
|
+
program
|
|
22
|
+
.command('tag [folder]', { isDefault: true })
|
|
23
|
+
.description('Otaguj pliki (Finder tagi + komentarze) — akcja domyślna')
|
|
24
|
+
.option('--config <path>', 'ścieżka do configu')
|
|
25
|
+
.option('--dry-run', 'podgląd bez zapisu', false)
|
|
26
|
+
.option('--model <name>', 'nazwa modelu LLM')
|
|
27
|
+
.option('--ollama-url <url>', 'Ollama base URL')
|
|
28
|
+
.option('--cloud <provider>', "'anthropic' | 'openai'")
|
|
29
|
+
.option('--api-key <key>', 'klucz API')
|
|
30
|
+
.option('--mask', 'pseudonimizuj OCR przed wysyłką do cloud', false)
|
|
31
|
+
.option('--lang <code>', "'en' | 'pl'")
|
|
32
|
+
.option('--exclude <patterns>', 'CSV — katalogi do pominięcia')
|
|
33
|
+
.option('--limit <n>', 'max plików', v => parseInt(v, 10))
|
|
34
|
+
.option('--skip-tagged', 'pomiń pliki z auto-tagiem', false)
|
|
35
|
+
.option('--no-dedup', 'pomiń detekcję duplikatów')
|
|
36
|
+
.option('--free', 'pozwól LLM-owi proponować nowe tagi (free-form)', false)
|
|
37
|
+
.option('--verbose', 'więcej logów', false)
|
|
38
|
+
.action((folder, opts) => tagCommand(folder, opts));
|
|
39
|
+
program
|
|
40
|
+
.command('organize [folder]')
|
|
41
|
+
.description('Przenieś pliki do folderów na bazie ich Finder tagów')
|
|
42
|
+
.option('--config <path>', 'ścieżka do configu')
|
|
43
|
+
.option('--target <path>', 'folder docelowy (nadpisuje config)')
|
|
44
|
+
.option('--dry-run', 'pokaż plan, nie przenoś', false)
|
|
45
|
+
.option('--apply', 'wykonaj przenoszenia (default = dry-run)', false)
|
|
46
|
+
.option('--verbose', 'więcej logów', false)
|
|
47
|
+
.action((folder, opts) => organizeCommand(folder, opts));
|
|
48
|
+
program
|
|
49
|
+
.command('clear [folder]')
|
|
50
|
+
.description('Wyczyść wszystkie sortai tagi i komentarze z plików')
|
|
51
|
+
.option('--config <path>', 'ścieżka do configu')
|
|
52
|
+
.option('--dry-run', 'podgląd bez kasowania', false)
|
|
53
|
+
.option('--verbose', 'więcej logów', false)
|
|
54
|
+
.action((folder, opts) => clearCommand(folder, opts));
|
|
55
|
+
program
|
|
56
|
+
.command('sample [folder]')
|
|
57
|
+
.description('Uruchom pełen pipeline na N losowych plikach (dry-run)')
|
|
58
|
+
.option('--config <path>', 'ścieżka do configu')
|
|
59
|
+
.option('-n, --count <count>', 'ile plików', v => parseInt(v, 10), 20)
|
|
60
|
+
.option('--verbose', 'więcej logów', false)
|
|
61
|
+
.action((folder, opts) => sampleCommand(folder, opts));
|
|
62
|
+
await program.parseAsync(process.argv);
|
|
292
63
|
}
|
|
293
64
|
main().catch(err => {
|
|
65
|
+
if (err instanceof Error && (err.name === 'ExitPromptError' || err.message.includes('force closed'))) {
|
|
66
|
+
process.stdout.write(chalk.yellow('\nAnulowano.\n'));
|
|
67
|
+
process.exit(130);
|
|
68
|
+
}
|
|
294
69
|
process.stderr.write(chalk.red(`Fatal: ${err instanceof Error ? err.message : String(err)}\n`));
|
|
295
70
|
process.exit(1);
|
|
296
71
|
});
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import path from 'node:path';
|
|
2
|
+
import { existsSync } from 'node:fs';
|
|
3
|
+
import { execFile } from 'node:child_process';
|
|
4
|
+
import { promisify } from 'node:util';
|
|
5
|
+
import chalk from 'chalk';
|
|
6
|
+
import { expandHome, loadConfig, configExists } from '../config.js';
|
|
7
|
+
import { walkFiles } from '../walker.js';
|
|
8
|
+
import { clearMacosMetadata } from '../macos.js';
|
|
9
|
+
import { runWizard } from '../wizard/index.js';
|
|
10
|
+
const execFileAsync = promisify(execFile);
|
|
11
|
+
export async function clearCommand(folder, opts) {
|
|
12
|
+
if (!(await configExists(opts.config))) {
|
|
13
|
+
process.stdout.write(chalk.cyan('🪄 Brak configu — uruchamiam wizard…\n\n'));
|
|
14
|
+
await runWizard({ configPath: opts.config, folderHint: folder });
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
const { config: cfg } = await loadConfig(opts.config);
|
|
18
|
+
const rawFolder = folder ?? cfg.scan.folder;
|
|
19
|
+
const root = path.resolve(expandHome(rawFolder));
|
|
20
|
+
if (!existsSync(root)) {
|
|
21
|
+
process.stderr.write(chalk.red(`Folder nie istnieje: ${root}\n`));
|
|
22
|
+
process.exit(1);
|
|
23
|
+
}
|
|
24
|
+
process.stdout.write(chalk.cyan(`🧹 Clearing sortai metadata from ${root}\n`));
|
|
25
|
+
if (opts.dryRun)
|
|
26
|
+
process.stdout.write(chalk.yellow(' [dry-run — no changes will be written]\n'));
|
|
27
|
+
process.stdout.write('\n');
|
|
28
|
+
const files = await walkFiles(root, cfg);
|
|
29
|
+
let cleared = 0;
|
|
30
|
+
let errors = 0;
|
|
31
|
+
for (const filePath of files) {
|
|
32
|
+
const rel = path.relative(root, filePath);
|
|
33
|
+
if (opts.dryRun) {
|
|
34
|
+
process.stdout.write(chalk.gray(` 🗑 ${rel}\n`));
|
|
35
|
+
cleared++;
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
try {
|
|
39
|
+
await clearMacosMetadata(filePath);
|
|
40
|
+
execFileAsync('mdimport', [filePath]).catch(() => { });
|
|
41
|
+
if (opts.verbose)
|
|
42
|
+
process.stdout.write(chalk.gray(` 🗑 ${rel}\n`));
|
|
43
|
+
cleared++;
|
|
44
|
+
}
|
|
45
|
+
catch {
|
|
46
|
+
process.stdout.write(chalk.red(` ❌ ${rel}\n`));
|
|
47
|
+
errors++;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
process.stdout.write('═══════════════════════════════════════════════════════\n');
|
|
51
|
+
process.stdout.write(chalk.bold('✨ Done\n'));
|
|
52
|
+
process.stdout.write(chalk.green(` 🗑 Cleared: ${cleared}\n`));
|
|
53
|
+
if (errors)
|
|
54
|
+
process.stdout.write(chalk.red(` ❌ Errors: ${errors}\n`));
|
|
55
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import chalk from 'chalk';
|
|
2
|
+
import { runWizard } from '../wizard/index.js';
|
|
3
|
+
import { tagCommand } from './tag.js';
|
|
4
|
+
import { organizeCommand } from './organize.js';
|
|
5
|
+
export async function initCommand(folder, opts) {
|
|
6
|
+
const result = await runWizard({
|
|
7
|
+
configPath: opts.config,
|
|
8
|
+
apiKey: opts.apiKey,
|
|
9
|
+
folderHint: folder,
|
|
10
|
+
});
|
|
11
|
+
if (result.shouldRunTag) {
|
|
12
|
+
process.stdout.write('\n' + chalk.bold.cyan('▶ Tagowanie\n\n'));
|
|
13
|
+
await tagCommand(result.config.scan.folder, {
|
|
14
|
+
config: opts.config,
|
|
15
|
+
dryRun: false,
|
|
16
|
+
verbose: false,
|
|
17
|
+
skipTagged: false,
|
|
18
|
+
dedup: result.config.dedup.enabled,
|
|
19
|
+
mask: result.config.mask.enabled,
|
|
20
|
+
free: result.config.tags.freeForm,
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
if (result.shouldRunOrganize) {
|
|
24
|
+
process.stdout.write('\n' + chalk.bold.cyan('▶ Sortowanie\n\n'));
|
|
25
|
+
await organizeCommand(result.config.scan.folder, {
|
|
26
|
+
config: opts.config,
|
|
27
|
+
apply: true,
|
|
28
|
+
dryRun: false,
|
|
29
|
+
verbose: false,
|
|
30
|
+
});
|
|
31
|
+
}
|
|
32
|
+
}
|