@aaronsb/kg-cli 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +112 -0
- package/dist/api/client.d.ts +867 -0
- package/dist/api/client.d.ts.map +1 -0
- package/dist/api/client.js +1362 -0
- package/dist/api/client.js.map +1 -0
- package/dist/cli/admin/backup.d.ts +9 -0
- package/dist/cli/admin/backup.d.ts.map +1 -0
- package/dist/cli/admin/backup.js +363 -0
- package/dist/cli/admin/backup.js.map +1 -0
- package/dist/cli/admin/index.d.ts +7 -0
- package/dist/cli/admin/index.d.ts.map +1 -0
- package/dist/cli/admin/index.js +52 -0
- package/dist/cli/admin/index.js.map +1 -0
- package/dist/cli/admin/scheduler.d.ts +7 -0
- package/dist/cli/admin/scheduler.d.ts.map +1 -0
- package/dist/cli/admin/scheduler.js +125 -0
- package/dist/cli/admin/scheduler.js.map +1 -0
- package/dist/cli/admin/status.d.ts +7 -0
- package/dist/cli/admin/status.d.ts.map +1 -0
- package/dist/cli/admin/status.js +134 -0
- package/dist/cli/admin/status.js.map +1 -0
- package/dist/cli/admin/utils.d.ts +34 -0
- package/dist/cli/admin/utils.d.ts.map +1 -0
- package/dist/cli/admin/utils.js +441 -0
- package/dist/cli/admin/utils.js.map +1 -0
- package/dist/cli/ai-config/embedding.d.ts +11 -0
- package/dist/cli/ai-config/embedding.d.ts.map +1 -0
- package/dist/cli/ai-config/embedding.js +598 -0
- package/dist/cli/ai-config/embedding.js.map +1 -0
- package/dist/cli/ai-config/extraction.d.ts +11 -0
- package/dist/cli/ai-config/extraction.d.ts.map +1 -0
- package/dist/cli/ai-config/extraction.js +206 -0
- package/dist/cli/ai-config/extraction.js.map +1 -0
- package/dist/cli/ai-config/index.d.ts +21 -0
- package/dist/cli/ai-config/index.d.ts.map +1 -0
- package/dist/cli/ai-config/index.js +27 -0
- package/dist/cli/ai-config/index.js.map +1 -0
- package/dist/cli/ai-config/keys.d.ts +11 -0
- package/dist/cli/ai-config/keys.d.ts.map +1 -0
- package/dist/cli/ai-config/keys.js +182 -0
- package/dist/cli/ai-config/keys.js.map +1 -0
- package/dist/cli/ai-config/utils.d.ts +13 -0
- package/dist/cli/ai-config/utils.d.ts.map +1 -0
- package/dist/cli/ai-config/utils.js +84 -0
- package/dist/cli/ai-config/utils.js.map +1 -0
- package/dist/cli/artifact.d.ts +8 -0
- package/dist/cli/artifact.d.ts.map +1 -0
- package/dist/cli/artifact.js +296 -0
- package/dist/cli/artifact.js.map +1 -0
- package/dist/cli/auth-admin.d.ts +11 -0
- package/dist/cli/auth-admin.d.ts.map +1 -0
- package/dist/cli/auth-admin.js +415 -0
- package/dist/cli/auth-admin.js.map +1 -0
- package/dist/cli/colors.d.ts +105 -0
- package/dist/cli/colors.d.ts.map +1 -0
- package/dist/cli/colors.js +164 -0
- package/dist/cli/colors.js.map +1 -0
- package/dist/cli/commands.d.ts +6 -0
- package/dist/cli/commands.d.ts.map +1 -0
- package/dist/cli/commands.js +164 -0
- package/dist/cli/commands.js.map +1 -0
- package/dist/cli/config.d.ts +6 -0
- package/dist/cli/config.d.ts.map +1 -0
- package/dist/cli/config.js +694 -0
- package/dist/cli/config.js.map +1 -0
- package/dist/cli/curve-viz.d.ts +89 -0
- package/dist/cli/curve-viz.d.ts.map +1 -0
- package/dist/cli/curve-viz.js +228 -0
- package/dist/cli/curve-viz.js.map +1 -0
- package/dist/cli/database.d.ts +6 -0
- package/dist/cli/database.d.ts.map +1 -0
- package/dist/cli/database.js +324 -0
- package/dist/cli/database.js.map +1 -0
- package/dist/cli/document.d.ts +6 -0
- package/dist/cli/document.d.ts.map +1 -0
- package/dist/cli/document.js +458 -0
- package/dist/cli/document.js.map +1 -0
- package/dist/cli/group.d.ts +8 -0
- package/dist/cli/group.d.ts.map +1 -0
- package/dist/cli/group.js +174 -0
- package/dist/cli/group.js.map +1 -0
- package/dist/cli/health.d.ts +6 -0
- package/dist/cli/health.d.ts.map +1 -0
- package/dist/cli/health.js +34 -0
- package/dist/cli/health.js.map +1 -0
- package/dist/cli/help-formatter.d.ts +16 -0
- package/dist/cli/help-formatter.d.ts.map +1 -0
- package/dist/cli/help-formatter.js +248 -0
- package/dist/cli/help-formatter.js.map +1 -0
- package/dist/cli/help.d.ts +9 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +227 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/ingest.d.ts +6 -0
- package/dist/cli/ingest.d.ts.map +1 -0
- package/dist/cli/ingest.js +722 -0
- package/dist/cli/ingest.js.map +1 -0
- package/dist/cli/jobs.d.ts +6 -0
- package/dist/cli/jobs.d.ts.map +1 -0
- package/dist/cli/jobs.js +663 -0
- package/dist/cli/jobs.js.map +1 -0
- package/dist/cli/login.d.ts +21 -0
- package/dist/cli/login.d.ts.map +1 -0
- package/dist/cli/login.js +221 -0
- package/dist/cli/login.js.map +1 -0
- package/dist/cli/logout.d.ts +16 -0
- package/dist/cli/logout.d.ts.map +1 -0
- package/dist/cli/logout.js +141 -0
- package/dist/cli/logout.js.map +1 -0
- package/dist/cli/mcp-config.d.ts +10 -0
- package/dist/cli/mcp-config.d.ts.map +1 -0
- package/dist/cli/mcp-config.js +358 -0
- package/dist/cli/mcp-config.js.map +1 -0
- package/dist/cli/oauth.d.ts +15 -0
- package/dist/cli/oauth.d.ts.map +1 -0
- package/dist/cli/oauth.js +296 -0
- package/dist/cli/oauth.js.map +1 -0
- package/dist/cli/ontology.d.ts +6 -0
- package/dist/cli/ontology.d.ts.map +1 -0
- package/dist/cli/ontology.js +231 -0
- package/dist/cli/ontology.js.map +1 -0
- package/dist/cli/polarity.d.ts +6 -0
- package/dist/cli/polarity.d.ts.map +1 -0
- package/dist/cli/polarity.js +295 -0
- package/dist/cli/polarity.js.map +1 -0
- package/dist/cli/projection.d.ts +8 -0
- package/dist/cli/projection.d.ts.map +1 -0
- package/dist/cli/projection.js +297 -0
- package/dist/cli/projection.js.map +1 -0
- package/dist/cli/query-def.d.ts +8 -0
- package/dist/cli/query-def.d.ts.map +1 -0
- package/dist/cli/query-def.js +163 -0
- package/dist/cli/query-def.js.map +1 -0
- package/dist/cli/rbac.d.ts +12 -0
- package/dist/cli/rbac.d.ts.map +1 -0
- package/dist/cli/rbac.js +615 -0
- package/dist/cli/rbac.js.map +1 -0
- package/dist/cli/search.d.ts +6 -0
- package/dist/cli/search.d.ts.map +1 -0
- package/dist/cli/search.js +829 -0
- package/dist/cli/search.js.map +1 -0
- package/dist/cli/source.d.ts +6 -0
- package/dist/cli/source.d.ts.map +1 -0
- package/dist/cli/source.js +202 -0
- package/dist/cli/source.js.map +1 -0
- package/dist/cli/verb-router.d.ts +25 -0
- package/dist/cli/verb-router.d.ts.map +1 -0
- package/dist/cli/verb-router.js +415 -0
- package/dist/cli/verb-router.js.map +1 -0
- package/dist/cli/vocabulary/config.d.ts +7 -0
- package/dist/cli/vocabulary/config.d.ts.map +1 -0
- package/dist/cli/vocabulary/config.js +201 -0
- package/dist/cli/vocabulary/config.js.map +1 -0
- package/dist/cli/vocabulary/consolidate.d.ts +8 -0
- package/dist/cli/vocabulary/consolidate.d.ts.map +1 -0
- package/dist/cli/vocabulary/consolidate.js +192 -0
- package/dist/cli/vocabulary/consolidate.js.map +1 -0
- package/dist/cli/vocabulary/embeddings.d.ts +9 -0
- package/dist/cli/vocabulary/embeddings.d.ts.map +1 -0
- package/dist/cli/vocabulary/embeddings.js +205 -0
- package/dist/cli/vocabulary/embeddings.js.map +1 -0
- package/dist/cli/vocabulary/epistemic.d.ts +7 -0
- package/dist/cli/vocabulary/epistemic.d.ts.map +1 -0
- package/dist/cli/vocabulary/epistemic.js +315 -0
- package/dist/cli/vocabulary/epistemic.js.map +1 -0
- package/dist/cli/vocabulary/index.d.ts +7 -0
- package/dist/cli/vocabulary/index.d.ts.map +1 -0
- package/dist/cli/vocabulary/index.js +45 -0
- package/dist/cli/vocabulary/index.js.map +1 -0
- package/dist/cli/vocabulary/profiles.d.ts +7 -0
- package/dist/cli/vocabulary/profiles.d.ts.map +1 -0
- package/dist/cli/vocabulary/profiles.js +171 -0
- package/dist/cli/vocabulary/profiles.js.map +1 -0
- package/dist/cli/vocabulary/similarity.d.ts +9 -0
- package/dist/cli/vocabulary/similarity.d.ts.map +1 -0
- package/dist/cli/vocabulary/similarity.js +199 -0
- package/dist/cli/vocabulary/similarity.js.map +1 -0
- package/dist/cli/vocabulary/status.d.ts +8 -0
- package/dist/cli/vocabulary/status.d.ts.map +1 -0
- package/dist/cli/vocabulary/status.js +280 -0
- package/dist/cli/vocabulary/status.js.map +1 -0
- package/dist/cli/vocabulary/sync.d.ts +7 -0
- package/dist/cli/vocabulary/sync.d.ts.map +1 -0
- package/dist/cli/vocabulary/sync.js +111 -0
- package/dist/cli/vocabulary/sync.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +16 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/auth/auth-client.d.ts +247 -0
- package/dist/lib/auth/auth-client.d.ts.map +1 -0
- package/dist/lib/auth/auth-client.js +305 -0
- package/dist/lib/auth/auth-client.js.map +1 -0
- package/dist/lib/auth/challenge.d.ts +39 -0
- package/dist/lib/auth/challenge.d.ts.map +1 -0
- package/dist/lib/auth/challenge.js +125 -0
- package/dist/lib/auth/challenge.js.map +1 -0
- package/dist/lib/auth/client-credentials-flow.d.ts +58 -0
- package/dist/lib/auth/client-credentials-flow.d.ts.map +1 -0
- package/dist/lib/auth/client-credentials-flow.js +118 -0
- package/dist/lib/auth/client-credentials-flow.js.map +1 -0
- package/dist/lib/auth/device-flow.d.ts +75 -0
- package/dist/lib/auth/device-flow.d.ts.map +1 -0
- package/dist/lib/auth/device-flow.js +177 -0
- package/dist/lib/auth/device-flow.js.map +1 -0
- package/dist/lib/auth/index.d.ts +14 -0
- package/dist/lib/auth/index.d.ts.map +1 -0
- package/dist/lib/auth/index.js +34 -0
- package/dist/lib/auth/index.js.map +1 -0
- package/dist/lib/auth/oauth-types.d.ts +69 -0
- package/dist/lib/auth/oauth-types.d.ts.map +1 -0
- package/dist/lib/auth/oauth-types.js +10 -0
- package/dist/lib/auth/oauth-types.js.map +1 -0
- package/dist/lib/auth/oauth-utils.d.ts +51 -0
- package/dist/lib/auth/oauth-utils.d.ts.map +1 -0
- package/dist/lib/auth/oauth-utils.js +110 -0
- package/dist/lib/auth/oauth-utils.js.map +1 -0
- package/dist/lib/auth/token-manager.d.ts +87 -0
- package/dist/lib/auth/token-manager.d.ts.map +1 -0
- package/dist/lib/auth/token-manager.js +139 -0
- package/dist/lib/auth/token-manager.js.map +1 -0
- package/dist/lib/auth/token-refresh.d.ts +63 -0
- package/dist/lib/auth/token-refresh.d.ts.map +1 -0
- package/dist/lib/auth/token-refresh.js +141 -0
- package/dist/lib/auth/token-refresh.js.map +1 -0
- package/dist/lib/config.d.ts +286 -0
- package/dist/lib/config.d.ts.map +1 -0
- package/dist/lib/config.js +537 -0
- package/dist/lib/config.js.map +1 -0
- package/dist/lib/job-stream.d.ts +53 -0
- package/dist/lib/job-stream.d.ts.map +1 -0
- package/dist/lib/job-stream.js +153 -0
- package/dist/lib/job-stream.js.map +1 -0
- package/dist/lib/mcp-allowlist.d.ts +101 -0
- package/dist/lib/mcp-allowlist.d.ts.map +1 -0
- package/dist/lib/mcp-allowlist.js +340 -0
- package/dist/lib/mcp-allowlist.js.map +1 -0
- package/dist/lib/table-example.d.ts +7 -0
- package/dist/lib/table-example.d.ts.map +1 -0
- package/dist/lib/table-example.js +105 -0
- package/dist/lib/table-example.js.map +1 -0
- package/dist/lib/table.d.ts +95 -0
- package/dist/lib/table.d.ts.map +1 -0
- package/dist/lib/table.js +263 -0
- package/dist/lib/table.js.map +1 -0
- package/dist/lib/terminal-images.d.ts +66 -0
- package/dist/lib/terminal-images.d.ts.map +1 -0
- package/dist/lib/terminal-images.js +268 -0
- package/dist/lib/terminal-images.js.map +1 -0
- package/dist/mcp/formatters.d.ts +100 -0
- package/dist/mcp/formatters.d.ts.map +1 -0
- package/dist/mcp/formatters.js +1411 -0
- package/dist/mcp/formatters.js.map +1 -0
- package/dist/mcp-server.d.ts +9 -0
- package/dist/mcp-server.d.ts.map +1 -0
- package/dist/mcp-server.js +1810 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/types/index.d.ts +742 -0
- package/dist/types/index.d.ts.map +1 -0
- package/dist/types/index.js +6 -0
- package/dist/types/index.js.map +1 -0
- package/dist/version.d.ts +10 -0
- package/dist/version.d.ts.map +1 -0
- package/dist/version.js +13 -0
- package/dist/version.js.map +1 -0
- package/package.json +84 -0
|
@@ -0,0 +1,722 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Ingestion commands
|
|
4
|
+
*/
|
|
5
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
6
|
+
if (k2 === undefined) k2 = k;
|
|
7
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
8
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
9
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
10
|
+
}
|
|
11
|
+
Object.defineProperty(o, k2, desc);
|
|
12
|
+
}) : (function(o, m, k, k2) {
|
|
13
|
+
if (k2 === undefined) k2 = k;
|
|
14
|
+
o[k2] = m[k];
|
|
15
|
+
}));
|
|
16
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
17
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
18
|
+
}) : function(o, v) {
|
|
19
|
+
o["default"] = v;
|
|
20
|
+
});
|
|
21
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
22
|
+
var ownKeys = function(o) {
|
|
23
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
24
|
+
var ar = [];
|
|
25
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
26
|
+
return ar;
|
|
27
|
+
};
|
|
28
|
+
return ownKeys(o);
|
|
29
|
+
};
|
|
30
|
+
return function (mod) {
|
|
31
|
+
if (mod && mod.__esModule) return mod;
|
|
32
|
+
var result = {};
|
|
33
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
34
|
+
__setModuleDefault(result, mod);
|
|
35
|
+
return result;
|
|
36
|
+
};
|
|
37
|
+
})();
|
|
38
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
39
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
40
|
+
};
|
|
41
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
|
+
exports.ingestCommand = void 0;
|
|
43
|
+
const commander_1 = require("commander");
|
|
44
|
+
const chalk_1 = __importDefault(require("chalk"));
|
|
45
|
+
const ora_1 = __importDefault(require("ora"));
|
|
46
|
+
const fs = __importStar(require("fs"));
|
|
47
|
+
const nodePath = __importStar(require("path")); // Renamed to avoid shadowing with parameter names
|
|
48
|
+
const os = __importStar(require("os")); // ADR-051: Get hostname for provenance
|
|
49
|
+
const client_1 = require("../api/client");
|
|
50
|
+
const config_1 = require("../lib/config");
|
|
51
|
+
const help_formatter_1 = require("./help-formatter");
|
|
52
|
+
exports.ingestCommand = (0, help_formatter_1.setCommandHelp)(new commander_1.Command('ingest'), 'Ingest documents into the knowledge graph', 'Ingest documents into the knowledge graph. Processes documents and extracts concepts, relationships, and evidence. Supports three modes: single file (one document), directory (batch ingest multiple files), and raw text (ingest text directly without a file). All operations create jobs (ADR-014) that can be monitored via "kg job" commands. Workflow: submit â chunk (semantic boundaries ~1000 words with overlap) â create job â optional approval â process (LLM extract, embed concepts, match existing, insert graph) â complete.')
|
|
53
|
+
.showHelpAfterError('(add --help for additional information)')
|
|
54
|
+
.showSuggestionAfterError();
|
|
55
|
+
// Ingest file command
|
|
56
|
+
exports.ingestCommand
|
|
57
|
+
.command('file <path>')
|
|
58
|
+
.description('Ingest a single document file. Reads file, chunks text into semantic segments (~1000 words with overlap), submits job, returns job ID. Optionally waits for completion with -w. Supports text files (.txt, .md, .rst), PDF documents (.pdf), and other API-supported formats. By default: auto-approves (starts immediately), uses serial processing (chunks see previous concepts for clean deduplication, slower but higher quality), detects duplicates (file hash checked, returns existing job if found). Use --force to bypass duplicate detection, --parallel for faster processing of large documents (may create duplicate concepts), --no-approve to require manual approval (ADR-014), -w to wait for completion (polls until complete, shows progress).')
|
|
59
|
+
.requiredOption('-o, --ontology <name>', 'Ontology/collection name (named collection or knowledge domain)')
|
|
60
|
+
.option('-f, --force', 'Force re-ingestion even if duplicate (bypasses hash check, creates new job)', false)
|
|
61
|
+
.option('--no-approve', 'Require manual approval before processing (job enters awaiting_approval state, must approve via "kg job approve <id>"). Default: auto-approve.')
|
|
62
|
+
.option('--parallel', 'Process in parallel (all chunks simultaneously, chunks don\'t see each other, may duplicate concepts, faster). Default: serial (sequential, cleaner deduplication, recommended).', false)
|
|
63
|
+
.option('--filename <name>', 'Override filename for tracking (displayed in ontology files list)')
|
|
64
|
+
.option('--target-words <n>', 'Target words per chunk (actual may vary based on natural boundaries, range 500-2000 typically effective)', '1000')
|
|
65
|
+
.option('--overlap-words <n>', 'Word overlap between chunks (provides context continuity, helps LLM understand cross-chunk relationships)', '200')
|
|
66
|
+
.option('-w, --wait', 'Wait for job completion (polls status, shows progress, returns final results). Default: submit and exit (returns immediately with job ID, monitor via "kg job status <id>").', false)
|
|
67
|
+
.showHelpAfterError()
|
|
68
|
+
.action(async (path, options) => {
|
|
69
|
+
try {
|
|
70
|
+
// Validate file exists
|
|
71
|
+
if (!fs.existsSync(path)) {
|
|
72
|
+
console.error(chalk_1.default.red(`â File not found: ${path}`));
|
|
73
|
+
process.exit(1);
|
|
74
|
+
}
|
|
75
|
+
const client = (0, client_1.createClientFromEnv)();
|
|
76
|
+
const config = (0, config_1.getConfig)();
|
|
77
|
+
// Default to auto-approve (options.approve is true by default due to --no-approve pattern)
|
|
78
|
+
// Only require approval if user explicitly passes --no-approve flag
|
|
79
|
+
const autoApprove = options.approve !== false;
|
|
80
|
+
const request = {
|
|
81
|
+
ontology: options.ontology,
|
|
82
|
+
filename: options.filename,
|
|
83
|
+
force: options.force,
|
|
84
|
+
auto_approve: autoApprove, // ADR-014: Auto-approve flag
|
|
85
|
+
processing_mode: options.parallel ? 'parallel' : 'serial',
|
|
86
|
+
options: {
|
|
87
|
+
target_words: parseInt(options.targetWords),
|
|
88
|
+
overlap_words: parseInt(options.overlapWords),
|
|
89
|
+
},
|
|
90
|
+
// ADR-051: Source provenance metadata
|
|
91
|
+
source_type: 'file',
|
|
92
|
+
source_path: nodePath.resolve(path), // Convert to absolute path
|
|
93
|
+
source_hostname: os.hostname(),
|
|
94
|
+
};
|
|
95
|
+
console.log(chalk_1.default.blue('Submitting document for ingestion...'));
|
|
96
|
+
console.log(chalk_1.default.gray(` File: ${path}`));
|
|
97
|
+
console.log(chalk_1.default.gray(` Ontology: ${request.ontology}`));
|
|
98
|
+
const result = await client.ingestFile(path, request);
|
|
99
|
+
// Check if duplicate
|
|
100
|
+
if ('duplicate' in result && result.duplicate) {
|
|
101
|
+
const dupResult = result;
|
|
102
|
+
console.log(chalk_1.default.yellow('\nâ Duplicate detected'));
|
|
103
|
+
console.log(chalk_1.default.gray(` Existing job: ${dupResult.existing_job_id}`));
|
|
104
|
+
console.log(chalk_1.default.gray(` Status: ${dupResult.status}`));
|
|
105
|
+
console.log(chalk_1.default.gray(`\n ${dupResult.message}`));
|
|
106
|
+
if (dupResult.use_force) {
|
|
107
|
+
console.log(chalk_1.default.gray(` ${dupResult.use_force}`));
|
|
108
|
+
}
|
|
109
|
+
if (dupResult.result) {
|
|
110
|
+
console.log(chalk_1.default.green('\nâ Previous ingestion completed:'));
|
|
111
|
+
printJobResult(dupResult.result);
|
|
112
|
+
}
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
// Type narrowed: result is JobSubmitResponse
|
|
116
|
+
const submitResult = result;
|
|
117
|
+
console.log(chalk_1.default.green(`\nâ Job submitted: ${submitResult.job_id}`));
|
|
118
|
+
// If --wait flag provided, poll for completion
|
|
119
|
+
if (options.wait) {
|
|
120
|
+
await pollJobWithProgress(client, submitResult.job_id);
|
|
121
|
+
}
|
|
122
|
+
else {
|
|
123
|
+
// Default: submit and exit (like walking away from the counter)
|
|
124
|
+
console.log(chalk_1.default.gray(`\nMonitor progress: ${chalk_1.default.cyan(`kg jobs status ${submitResult.job_id} --watch`)}`));
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
catch (error) {
|
|
128
|
+
console.error(chalk_1.default.red('\nâ Ingestion failed'));
|
|
129
|
+
console.error(chalk_1.default.red(error.response?.data?.detail || error.message));
|
|
130
|
+
process.exit(1);
|
|
131
|
+
}
|
|
132
|
+
});
|
|
133
|
+
// Ingest directory command
|
|
134
|
+
exports.ingestCommand
|
|
135
|
+
.command('directory <dir>')
|
|
136
|
+
.description('Ingest all matching files from a directory (batch processing). Scans directory for files matching patterns (default: text *.md *.txt, images *.png *.jpg *.jpeg *.gif *.webp), optionally recurses into subdirectories (-r with depth limit), groups files by ontology (single ontology via -o OR auto-create from subdirectory names via --directories-as-ontologies), and submits batch jobs. Auto-detects file type: images use vision pipeline (ADR-057), text files use standard extraction. Use --dry-run to preview what would be ingested without submitting (checks duplicates, shows skip/submit counts). Directory-as-ontology mode: each subdirectory becomes separate ontology named after directory, useful for organizing knowledge domains by folder structure. Examples: "physics/" â "physics" ontology, "chemistry/organic/" â "organic" ontology.')
|
|
137
|
+
.option('-o, --ontology <name>', 'Ontology/collection name (required unless --directories-as-ontologies). Single ontology receives all files.')
|
|
138
|
+
.option('-p, --pattern <patterns...>', 'File patterns to match (glob patterns). Text and image extensions supported.', ['*.md', '*.txt', '*.png', '*.jpg', '*.jpeg', '*.gif', '*.webp', '*.bmp'])
|
|
139
|
+
.option('-r, --recurse', 'Enable recursive scanning of subdirectories. MUST combine with --depth. Examples: "--recurse --depth 1" (one level), "--recurse --depth 2" (two levels), "--recurse --depth all" (unlimited). Default depth is 0 (current dir only).', false)
|
|
140
|
+
.option('-d, --depth <n>', 'Maximum recursion depth (use with --recurse). 0=current dir only (default), 1=one level deep, 2=two levels, "all"=unlimited depth. WITHOUT --recurse, only current directory is scanned.', '0')
|
|
141
|
+
.option('--directories-as-ontologies', 'Use directory names as ontology names (auto-creates ontologies from folder structure, cannot be combined with -o)', false)
|
|
142
|
+
.option('-f, --force', 'Force re-ingestion even if duplicate (bypasses hash check for all files)', false)
|
|
143
|
+
.option('--dry-run', 'Show what would be ingested without submitting jobs (validates files, checks duplicates, displays skip/submit counts, cancels test jobs)', false)
|
|
144
|
+
.option('--no-approve', 'Require manual approval before processing (default: auto-approve)')
|
|
145
|
+
.option('--parallel', 'Process in parallel (faster but may create duplicate concepts)', false)
|
|
146
|
+
.option('--target-words <n>', 'Target words per chunk', '1000')
|
|
147
|
+
.option('--overlap-words <n>', 'Overlap between chunks', '200')
|
|
148
|
+
.showHelpAfterError()
|
|
149
|
+
.action(async (dir, options) => {
|
|
150
|
+
try {
|
|
151
|
+
// Validate directory exists
|
|
152
|
+
if (!fs.existsSync(dir)) {
|
|
153
|
+
console.error(chalk_1.default.red(`â Directory not found: ${dir}`));
|
|
154
|
+
process.exit(1);
|
|
155
|
+
}
|
|
156
|
+
if (!fs.statSync(dir).isDirectory()) {
|
|
157
|
+
console.error(chalk_1.default.red(`â Not a directory: ${dir}`));
|
|
158
|
+
process.exit(1);
|
|
159
|
+
}
|
|
160
|
+
// Validate options: either --ontology or --directories-as-ontologies required
|
|
161
|
+
if (!options.ontology && !options.directoriesAsOntologies) {
|
|
162
|
+
console.error(chalk_1.default.red('â Either --ontology or --directories-as-ontologies is required'));
|
|
163
|
+
console.error(chalk_1.default.gray(' Use --ontology to specify a single ontology'));
|
|
164
|
+
console.error(chalk_1.default.gray(' Use --directories-as-ontologies to auto-create ontologies from directory structure'));
|
|
165
|
+
process.exit(1);
|
|
166
|
+
}
|
|
167
|
+
if (options.ontology && options.directoriesAsOntologies) {
|
|
168
|
+
console.error(chalk_1.default.red('â Cannot use both --ontology and --directories-as-ontologies'));
|
|
169
|
+
console.error(chalk_1.default.gray(' Choose one: specify ontology or use directory names'));
|
|
170
|
+
process.exit(1);
|
|
171
|
+
}
|
|
172
|
+
const client = (0, client_1.createClientFromEnv)();
|
|
173
|
+
// Determine depth
|
|
174
|
+
const maxDepth = options.depth === 'all' ? Infinity : parseInt(options.depth);
|
|
175
|
+
const recurse = options.recurse || maxDepth > 0;
|
|
176
|
+
// Warn if user specified --recurse but didn't set --depth (will only scan current dir)
|
|
177
|
+
if (options.recurse && options.depth === '0') {
|
|
178
|
+
console.log(chalk_1.default.yellow('\nâ ď¸ Warning: --recurse specified but --depth is 0 (default)'));
|
|
179
|
+
console.log(chalk_1.default.gray(' This will only scan the current directory, not subdirectories.'));
|
|
180
|
+
console.log(chalk_1.default.gray(' Did you mean: --recurse --depth 1 (or --depth 2, etc.)?'));
|
|
181
|
+
console.log(chalk_1.default.gray(' Continuing with current directory only...\n'));
|
|
182
|
+
}
|
|
183
|
+
// Collect matching files (with directory info if using directories as ontologies)
|
|
184
|
+
const filesWithDirs = options.directoriesAsOntologies
|
|
185
|
+
? collectFilesWithDirectories(dir, options.pattern, recurse, maxDepth)
|
|
186
|
+
: collectFiles(dir, options.pattern, recurse, maxDepth).map(f => ({ file: f, ontologyDir: dir }));
|
|
187
|
+
if (filesWithDirs.length === 0) {
|
|
188
|
+
console.log(chalk_1.default.yellow(`\nâ No files found matching patterns: ${options.pattern.join(', ')}`));
|
|
189
|
+
return;
|
|
190
|
+
}
|
|
191
|
+
// Group files by ontology if using directory names
|
|
192
|
+
const filesByOntology = new Map();
|
|
193
|
+
for (const { file, ontologyDir } of filesWithDirs) {
|
|
194
|
+
const ontologyName = options.directoriesAsOntologies
|
|
195
|
+
? nodePath.basename(ontologyDir)
|
|
196
|
+
: options.ontology;
|
|
197
|
+
if (!filesByOntology.has(ontologyName)) {
|
|
198
|
+
filesByOntology.set(ontologyName, []);
|
|
199
|
+
}
|
|
200
|
+
filesByOntology.get(ontologyName).push(file);
|
|
201
|
+
}
|
|
202
|
+
// Categorize files by type
|
|
203
|
+
const imageFiles = filesWithDirs.filter(({ file }) => isImageFile(file));
|
|
204
|
+
const textFiles = filesWithDirs.filter(({ file }) => !isImageFile(file));
|
|
205
|
+
console.log(chalk_1.default.blue(`\nđ Found ${filesWithDirs.length} file(s):`));
|
|
206
|
+
console.log(chalk_1.default.gray(` Text files: ${textFiles.length}`));
|
|
207
|
+
console.log(chalk_1.default.gray(` Image files: ${imageFiles.length}`));
|
|
208
|
+
if (options.directoriesAsOntologies) {
|
|
209
|
+
console.log(chalk_1.default.gray(` Ontologies: ${filesByOntology.size}`));
|
|
210
|
+
for (const [ontology, files] of filesByOntology) {
|
|
211
|
+
const ontImages = files.filter(f => isImageFile(f)).length;
|
|
212
|
+
const ontText = files.length - ontImages;
|
|
213
|
+
console.log(chalk_1.default.gray(` ⢠${ontology}: ${files.length} file(s) (${ontText} text, ${ontImages} images)`));
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
else {
|
|
217
|
+
if (textFiles.length > 0) {
|
|
218
|
+
console.log(chalk_1.default.gray(`\n Text files:`));
|
|
219
|
+
textFiles.forEach(({ file }) => console.log(chalk_1.default.gray(` ⢠${nodePath.relative(dir, file)}`)));
|
|
220
|
+
}
|
|
221
|
+
if (imageFiles.length > 0) {
|
|
222
|
+
console.log(chalk_1.default.gray(`\n Images:`));
|
|
223
|
+
imageFiles.forEach(({ file }) => console.log(chalk_1.default.gray(` ⢠${nodePath.relative(dir, file)}`)));
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
// Dry-run mode: check duplicates without submitting
|
|
227
|
+
if (options.dryRun) {
|
|
228
|
+
console.log(chalk_1.default.blue(`\nđ Dry-run mode: Checking for duplicates...\n`));
|
|
229
|
+
let wouldSubmit = 0;
|
|
230
|
+
let wouldSkip = 0;
|
|
231
|
+
const skipDetails = [];
|
|
232
|
+
const submitDetails = [];
|
|
233
|
+
for (const { file: filePath, ontologyDir } of filesWithDirs) {
|
|
234
|
+
const ontologyName = options.directoriesAsOntologies
|
|
235
|
+
? nodePath.basename(ontologyDir)
|
|
236
|
+
: options.ontology;
|
|
237
|
+
const request = {
|
|
238
|
+
ontology: ontologyName,
|
|
239
|
+
filename: nodePath.basename(filePath),
|
|
240
|
+
force: options.force,
|
|
241
|
+
auto_approve: false, // Don't matter for dry-run, but set conservative
|
|
242
|
+
processing_mode: 'serial',
|
|
243
|
+
// ADR-051: Source provenance metadata
|
|
244
|
+
source_type: 'file',
|
|
245
|
+
source_path: nodePath.resolve(filePath),
|
|
246
|
+
source_hostname: os.hostname(),
|
|
247
|
+
};
|
|
248
|
+
try {
|
|
249
|
+
// Route to appropriate API based on file type
|
|
250
|
+
let result;
|
|
251
|
+
if (isImageFile(filePath)) {
|
|
252
|
+
// Image dry-run check
|
|
253
|
+
const imageRequest = {
|
|
254
|
+
ontology: request.ontology,
|
|
255
|
+
filename: request.filename,
|
|
256
|
+
force: request.force,
|
|
257
|
+
auto_approve: false,
|
|
258
|
+
vision_provider: 'openai',
|
|
259
|
+
source_type: 'file',
|
|
260
|
+
source_path: request.source_path,
|
|
261
|
+
source_hostname: request.source_hostname,
|
|
262
|
+
};
|
|
263
|
+
result = await client.ingestImage(filePath, imageRequest);
|
|
264
|
+
}
|
|
265
|
+
else {
|
|
266
|
+
// Text dry-run check
|
|
267
|
+
result = await client.ingestFile(filePath, request);
|
|
268
|
+
}
|
|
269
|
+
const displayPath = options.directoriesAsOntologies
|
|
270
|
+
? `[${ontologyName}] ${nodePath.basename(filePath)}`
|
|
271
|
+
: nodePath.relative(dir, filePath);
|
|
272
|
+
const fileType = isImageFile(filePath) ? 'đźď¸ ' : 'đ ';
|
|
273
|
+
if ('duplicate' in result && result.duplicate) {
|
|
274
|
+
wouldSkip++;
|
|
275
|
+
skipDetails.push(` ${chalk_1.default.yellow('â')} ${chalk_1.default.gray(fileType + displayPath)}`);
|
|
276
|
+
}
|
|
277
|
+
else {
|
|
278
|
+
// It created a pending job - we need to cancel it
|
|
279
|
+
const submitResult = result;
|
|
280
|
+
await client.cancelJob(submitResult.job_id);
|
|
281
|
+
wouldSubmit++;
|
|
282
|
+
submitDetails.push(` ${chalk_1.default.green('â')} ${fileType + displayPath}`);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
catch (error) {
|
|
286
|
+
const fileType = isImageFile(filePath) ? 'đźď¸ ' : 'đ ';
|
|
287
|
+
wouldSkip++;
|
|
288
|
+
skipDetails.push(` ${chalk_1.default.red('â')} ${chalk_1.default.gray(fileType + nodePath.relative(dir, filePath))} ${chalk_1.default.dim(`(${error.message})`)}`);
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
console.log(chalk_1.default.blue(`\nđ Dry-run Summary:`));
|
|
292
|
+
console.log(chalk_1.default.gray(` Total files: ${filesWithDirs.length}`));
|
|
293
|
+
console.log(chalk_1.default.green(` Would submit: ${wouldSubmit}`));
|
|
294
|
+
console.log(chalk_1.default.yellow(` Would skip (duplicates): ${wouldSkip}`));
|
|
295
|
+
if (submitDetails.length > 0) {
|
|
296
|
+
console.log(chalk_1.default.green(`\nâ Files that would be ingested:`));
|
|
297
|
+
submitDetails.forEach(line => console.log(line));
|
|
298
|
+
}
|
|
299
|
+
if (skipDetails.length > 0) {
|
|
300
|
+
console.log(chalk_1.default.yellow(`\nâ Files that would be skipped:`));
|
|
301
|
+
skipDetails.forEach(line => console.log(line));
|
|
302
|
+
}
|
|
303
|
+
console.log(chalk_1.default.blue(`\nđĄ To proceed with ingestion, run without --dry-run flag\n`));
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
// Normal mode: actually submit jobs
|
|
307
|
+
// Default to auto-approve
|
|
308
|
+
const autoApprove = options.approve !== false;
|
|
309
|
+
console.log(chalk_1.default.blue(`\nSubmitting ${filesWithDirs.length} ingestion jobs...`));
|
|
310
|
+
if (!options.directoriesAsOntologies) {
|
|
311
|
+
console.log(chalk_1.default.gray(` Ontology: ${options.ontology}`));
|
|
312
|
+
}
|
|
313
|
+
console.log(chalk_1.default.gray(` Auto-approve: ${autoApprove ? 'yes' : 'no'}\n`));
|
|
314
|
+
const jobIds = [];
|
|
315
|
+
let submitted = 0;
|
|
316
|
+
let skipped = 0;
|
|
317
|
+
for (const { file: filePath, ontologyDir } of filesWithDirs) {
|
|
318
|
+
const ontologyName = options.directoriesAsOntologies
|
|
319
|
+
? nodePath.basename(ontologyDir)
|
|
320
|
+
: options.ontology;
|
|
321
|
+
const request = {
|
|
322
|
+
ontology: ontologyName,
|
|
323
|
+
filename: nodePath.basename(filePath),
|
|
324
|
+
force: options.force,
|
|
325
|
+
auto_approve: autoApprove,
|
|
326
|
+
processing_mode: options.parallel ? 'parallel' : 'serial',
|
|
327
|
+
options: {
|
|
328
|
+
target_words: parseInt(options.targetWords),
|
|
329
|
+
overlap_words: parseInt(options.overlapWords),
|
|
330
|
+
},
|
|
331
|
+
// ADR-051: Source provenance metadata
|
|
332
|
+
source_type: 'file',
|
|
333
|
+
source_path: nodePath.resolve(filePath),
|
|
334
|
+
source_hostname: os.hostname(),
|
|
335
|
+
};
|
|
336
|
+
try {
|
|
337
|
+
// Route to appropriate API based on file type
|
|
338
|
+
let result;
|
|
339
|
+
if (isImageFile(filePath)) {
|
|
340
|
+
// Image ingestion (ADR-057)
|
|
341
|
+
const imageRequest = {
|
|
342
|
+
ontology: request.ontology,
|
|
343
|
+
filename: request.filename,
|
|
344
|
+
force: request.force,
|
|
345
|
+
auto_approve: request.auto_approve,
|
|
346
|
+
vision_provider: 'openai', // Default to OpenAI for directory ingestion
|
|
347
|
+
source_type: 'file',
|
|
348
|
+
source_path: request.source_path,
|
|
349
|
+
source_hostname: request.source_hostname,
|
|
350
|
+
};
|
|
351
|
+
result = await client.ingestImage(filePath, imageRequest);
|
|
352
|
+
}
|
|
353
|
+
else {
|
|
354
|
+
// Text ingestion
|
|
355
|
+
result = await client.ingestFile(filePath, request);
|
|
356
|
+
}
|
|
357
|
+
if ('duplicate' in result && result.duplicate) {
|
|
358
|
+
const displayPath = options.directoriesAsOntologies
|
|
359
|
+
? `[${ontologyName}] ${nodePath.basename(filePath)}`
|
|
360
|
+
: nodePath.relative(dir, filePath);
|
|
361
|
+
const fileType = isImageFile(filePath) ? 'đźď¸ ' : 'đ ';
|
|
362
|
+
console.log(chalk_1.default.yellow(`â Skipped (duplicate): ${fileType}${displayPath}`));
|
|
363
|
+
skipped++;
|
|
364
|
+
}
|
|
365
|
+
else {
|
|
366
|
+
const submitResult = result;
|
|
367
|
+
jobIds.push(submitResult.job_id);
|
|
368
|
+
const displayPath = options.directoriesAsOntologies
|
|
369
|
+
? `[${ontologyName}] ${nodePath.basename(filePath)}`
|
|
370
|
+
: nodePath.relative(dir, filePath);
|
|
371
|
+
const fileType = isImageFile(filePath) ? 'đźď¸ ' : 'đ ';
|
|
372
|
+
console.log(chalk_1.default.green(`â Queued: ${fileType}${displayPath} â ${submitResult.job_id.substring(0, 12)}...`));
|
|
373
|
+
submitted++;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
catch (error) {
|
|
377
|
+
const displayPath = options.directoriesAsOntologies
|
|
378
|
+
? `[${ontologyName}] ${nodePath.basename(filePath)}`
|
|
379
|
+
: nodePath.relative(dir, filePath);
|
|
380
|
+
const fileType = isImageFile(filePath) ? 'đźď¸ ' : 'đ ';
|
|
381
|
+
console.log(chalk_1.default.red(`â Failed: ${fileType}${displayPath} - ${error.message}`));
|
|
382
|
+
skipped++;
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
console.log(chalk_1.default.blue(`\nđ Summary:`));
|
|
386
|
+
console.log(chalk_1.default.gray(` Submitted: ${submitted}`));
|
|
387
|
+
console.log(chalk_1.default.gray(` Skipped: ${skipped}`));
|
|
388
|
+
if (jobIds.length > 0) {
|
|
389
|
+
console.log(chalk_1.default.blue('\nđ Next steps:'));
|
|
390
|
+
if (autoApprove) {
|
|
391
|
+
console.log(chalk_1.default.gray(` Monitor all: ${chalk_1.default.cyan(`kg jobs list`)}`));
|
|
392
|
+
console.log(chalk_1.default.gray(` View details: ${chalk_1.default.cyan(`kg jobs status <job-id>`)}`));
|
|
393
|
+
}
|
|
394
|
+
else {
|
|
395
|
+
console.log(chalk_1.default.gray(` Approve all pending: ${chalk_1.default.cyan(`kg jobs approve pending`)}`));
|
|
396
|
+
console.log(chalk_1.default.gray(` View pending: ${chalk_1.default.cyan(`kg jobs list pending`)}`));
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
catch (error) {
|
|
401
|
+
console.error(chalk_1.default.red('\nâ Directory ingestion failed'));
|
|
402
|
+
console.error(chalk_1.default.red(error.response?.data?.detail || error.message));
|
|
403
|
+
process.exit(1);
|
|
404
|
+
}
|
|
405
|
+
});
|
|
406
|
+
// Ingest text command
|
|
407
|
+
exports.ingestCommand
|
|
408
|
+
.command('text <text>')
|
|
409
|
+
.description('Ingest raw text directly without a file. Submits text content as ingestion job, useful for quick testing/prototyping, ingesting programmatically generated text, API/script integration, and processing text from other commands. Can pipe command output via xargs or use multiline text with heredoc syntax. Text is chunked (default 1000 words per chunk) and processed like file ingestion. Use --filename to customize displayed name in ontology files list (default: text_input). Behavior same as file ingestion: auto-approves by default, detects duplicates, supports --wait for synchronous completion.')
|
|
410
|
+
.requiredOption('-o, --ontology <name>', 'Ontology/collection name (named collection or knowledge domain)')
|
|
411
|
+
.option('-f, --force', 'Force re-ingestion even if duplicate (bypasses content hash check)', false)
|
|
412
|
+
.option('--no-approve', 'Require manual approval before processing (default: auto-approve)')
|
|
413
|
+
.option('--parallel', 'Process in parallel (faster but may create duplicate concepts)', false)
|
|
414
|
+
.option('--filename <name>', 'Filename for tracking (displayed in ontology files list, temporary path context)', 'text_input')
|
|
415
|
+
.option('--target-words <n>', 'Target words per chunk', '1000')
|
|
416
|
+
.option('-w, --wait', 'Wait for job completion (polls until complete, shows progress). Default: submit and exit.', false)
|
|
417
|
+
.showHelpAfterError()
|
|
418
|
+
.action(async (text, options) => {
|
|
419
|
+
try {
|
|
420
|
+
const client = (0, client_1.createClientFromEnv)();
|
|
421
|
+
const config = (0, config_1.getConfig)();
|
|
422
|
+
// Default to auto-approve (options.approve is true by default due to --no-approve pattern)
|
|
423
|
+
// Only require approval if user explicitly passes --no-approve flag
|
|
424
|
+
const autoApprove = options.approve !== false;
|
|
425
|
+
const request = {
|
|
426
|
+
ontology: options.ontology,
|
|
427
|
+
filename: options.filename,
|
|
428
|
+
force: options.force,
|
|
429
|
+
auto_approve: autoApprove, // ADR-014: Auto-approve flag
|
|
430
|
+
processing_mode: options.parallel ? 'parallel' : 'serial',
|
|
431
|
+
options: {
|
|
432
|
+
target_words: parseInt(options.targetWords),
|
|
433
|
+
},
|
|
434
|
+
// ADR-051: Source provenance metadata (stdin/direct text input)
|
|
435
|
+
source_type: 'stdin',
|
|
436
|
+
};
|
|
437
|
+
console.log(chalk_1.default.blue('Submitting text for ingestion...'));
|
|
438
|
+
console.log(chalk_1.default.gray(` Text length: ${text.length} chars`));
|
|
439
|
+
console.log(chalk_1.default.gray(` Ontology: ${request.ontology}`));
|
|
440
|
+
const result = await client.ingestText(text, request);
|
|
441
|
+
// Check if duplicate
|
|
442
|
+
if ('duplicate' in result && result.duplicate) {
|
|
443
|
+
const dupResult = result;
|
|
444
|
+
console.log(chalk_1.default.yellow('\nâ Duplicate detected'));
|
|
445
|
+
console.log(chalk_1.default.gray(` ${dupResult.message}`));
|
|
446
|
+
return;
|
|
447
|
+
}
|
|
448
|
+
// Type narrowed: result is JobSubmitResponse
|
|
449
|
+
const submitResult = result;
|
|
450
|
+
console.log(chalk_1.default.green(`\nâ Job submitted: ${submitResult.job_id}`));
|
|
451
|
+
// If --wait flag provided, poll for completion
|
|
452
|
+
if (options.wait) {
|
|
453
|
+
await pollJobWithProgress(client, submitResult.job_id);
|
|
454
|
+
}
|
|
455
|
+
else {
|
|
456
|
+
// Default: submit and exit (like walking away from the counter)
|
|
457
|
+
console.log(chalk_1.default.gray(`\nMonitor progress: ${chalk_1.default.cyan(`kg jobs status ${submitResult.job_id} --watch`)}`));
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
catch (error) {
|
|
461
|
+
console.error(chalk_1.default.red('\nâ Ingestion failed'));
|
|
462
|
+
console.error(chalk_1.default.red(error.response?.data?.detail || error.message));
|
|
463
|
+
process.exit(1);
|
|
464
|
+
}
|
|
465
|
+
});
|
|
466
|
+
/**
|
|
467
|
+
* Check if a file is an image based on extension
|
|
468
|
+
*/
|
|
469
|
+
function isImageFile(filepath) {
|
|
470
|
+
const ext = nodePath.extname(filepath).toLowerCase();
|
|
471
|
+
const imageExtensions = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp'];
|
|
472
|
+
return imageExtensions.includes(ext);
|
|
473
|
+
}
|
|
474
|
+
/**
|
|
475
|
+
* Collect files matching patterns from directory
|
|
476
|
+
*/
|
|
477
|
+
function collectFiles(dir, patterns, recurse, maxDepth, currentDepth = 0) {
|
|
478
|
+
const files = [];
|
|
479
|
+
// Don't recurse beyond max depth
|
|
480
|
+
if (currentDepth > maxDepth) {
|
|
481
|
+
return files;
|
|
482
|
+
}
|
|
483
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
484
|
+
for (const entry of entries) {
|
|
485
|
+
const fullPath = nodePath.join(dir, entry.name);
|
|
486
|
+
if (entry.isDirectory() && recurse && currentDepth < maxDepth) {
|
|
487
|
+
// Recurse into subdirectory
|
|
488
|
+
files.push(...collectFiles(fullPath, patterns, recurse, maxDepth, currentDepth + 1));
|
|
489
|
+
}
|
|
490
|
+
else if (entry.isFile()) {
|
|
491
|
+
// Check if file matches any pattern
|
|
492
|
+
const matches = patterns.some(pattern => {
|
|
493
|
+
// Convert glob pattern to regex
|
|
494
|
+
const regexPattern = pattern
|
|
495
|
+
.replace(/\./g, '\\.')
|
|
496
|
+
.replace(/\*/g, '.*')
|
|
497
|
+
.replace(/\?/g, '.');
|
|
498
|
+
const regex = new RegExp(`^${regexPattern}$`, 'i');
|
|
499
|
+
return regex.test(entry.name);
|
|
500
|
+
});
|
|
501
|
+
if (matches) {
|
|
502
|
+
files.push(fullPath);
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
return files;
|
|
507
|
+
}
|
|
508
|
+
/**
|
|
509
|
+
* Collect files with their parent directory info for ontology mapping
|
|
510
|
+
*/
|
|
511
|
+
function collectFilesWithDirectories(baseDir, patterns, recurse, maxDepth, currentDepth = 0) {
|
|
512
|
+
const results = [];
|
|
513
|
+
// Don't recurse beyond max depth
|
|
514
|
+
if (currentDepth > maxDepth) {
|
|
515
|
+
return results;
|
|
516
|
+
}
|
|
517
|
+
const entries = fs.readdirSync(baseDir, { withFileTypes: true });
|
|
518
|
+
for (const entry of entries) {
|
|
519
|
+
const fullPath = nodePath.join(baseDir, entry.name);
|
|
520
|
+
if (entry.isDirectory() && recurse && currentDepth < maxDepth) {
|
|
521
|
+
// Recurse into subdirectory - subdirectory becomes the ontology
|
|
522
|
+
const subResults = collectFilesWithDirectories(fullPath, patterns, recurse, maxDepth, currentDepth + 1);
|
|
523
|
+
results.push(...subResults);
|
|
524
|
+
}
|
|
525
|
+
else if (entry.isFile()) {
|
|
526
|
+
// Check if file matches any pattern
|
|
527
|
+
const matches = patterns.some(pattern => {
|
|
528
|
+
// Convert glob pattern to regex
|
|
529
|
+
const regexPattern = pattern
|
|
530
|
+
.replace(/\./g, '\\.')
|
|
531
|
+
.replace(/\*/g, '.*')
|
|
532
|
+
.replace(/\?/g, '.');
|
|
533
|
+
const regex = new RegExp(`^${regexPattern}$`, 'i');
|
|
534
|
+
return regex.test(entry.name);
|
|
535
|
+
});
|
|
536
|
+
if (matches) {
|
|
537
|
+
// Use immediate parent directory as ontology
|
|
538
|
+
results.push({
|
|
539
|
+
file: fullPath,
|
|
540
|
+
ontologyDir: baseDir
|
|
541
|
+
});
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
return results;
|
|
546
|
+
}
|
|
547
|
+
/**
|
|
548
|
+
* Poll job with progress spinner
|
|
549
|
+
*/
|
|
550
|
+
async function pollJobWithProgress(client, jobId) {
|
|
551
|
+
const spinner = (0, ora_1.default)('Queued...').start();
|
|
552
|
+
try {
|
|
553
|
+
const finalJob = await client.pollJob(jobId, (job) => {
|
|
554
|
+
if (job.status === 'processing' && job.progress) {
|
|
555
|
+
const p = job.progress;
|
|
556
|
+
if (p.percent !== undefined) {
|
|
557
|
+
const conceptsTotal = (p.concepts_created || 0) + (p.concepts_linked || 0);
|
|
558
|
+
const hitRate = conceptsTotal > 0 ? Math.round((p.concepts_linked || 0) / conceptsTotal * 100) : 0;
|
|
559
|
+
spinner.text = `Processing... ${p.percent}% (${p.chunks_processed}/${p.chunks_total} chunks) | Concepts: ${conceptsTotal} (${hitRate}% reused) | Relationships: ${p.relationships_created || 0}`;
|
|
560
|
+
}
|
|
561
|
+
else {
|
|
562
|
+
spinner.text = `Processing... ${p.stage}`;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
565
|
+
else if (job.status === 'queued') {
|
|
566
|
+
spinner.text = 'Queued...';
|
|
567
|
+
}
|
|
568
|
+
});
|
|
569
|
+
if (finalJob.status === 'completed') {
|
|
570
|
+
spinner.succeed('Ingestion completed!');
|
|
571
|
+
printJobResult(finalJob.result);
|
|
572
|
+
}
|
|
573
|
+
else if (finalJob.status === 'failed') {
|
|
574
|
+
spinner.fail(`Ingestion failed: ${finalJob.error}`);
|
|
575
|
+
process.exit(1);
|
|
576
|
+
}
|
|
577
|
+
else if (finalJob.status === 'cancelled') {
|
|
578
|
+
spinner.warn('Ingestion cancelled');
|
|
579
|
+
}
|
|
580
|
+
else if (finalJob.status === 'awaiting_approval') {
|
|
581
|
+
spinner.info('Job awaiting approval');
|
|
582
|
+
console.log(chalk_1.default.blue('\nđ Job requires approval before processing'));
|
|
583
|
+
console.log(chalk_1.default.gray(` Job ID: ${jobId}`));
|
|
584
|
+
console.log(chalk_1.default.gray(`\n To approve: ${chalk_1.default.cyan(`kg jobs approve ${jobId}`)}`));
|
|
585
|
+
console.log(chalk_1.default.gray(` To cancel: ${chalk_1.default.cyan(`kg jobs cancel ${jobId}`)}`));
|
|
586
|
+
console.log(chalk_1.default.gray(` To monitor: ${chalk_1.default.cyan(`kg jobs status ${jobId} --watch`)}`));
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
catch (error) {
|
|
590
|
+
spinner.fail('Polling failed');
|
|
591
|
+
throw error;
|
|
592
|
+
}
|
|
593
|
+
}
|
|
594
|
+
/**
|
|
595
|
+
* Print job result summary
|
|
596
|
+
*/
|
|
597
|
+
function printJobResult(result) {
|
|
598
|
+
if (!result)
|
|
599
|
+
return;
|
|
600
|
+
console.log(chalk_1.default.blue('\nResults:'));
|
|
601
|
+
if (result.stats) {
|
|
602
|
+
console.log(chalk_1.default.gray(` Chunks processed: ${result.stats.chunks_processed}`));
|
|
603
|
+
console.log(chalk_1.default.gray(` Concepts created: ${result.stats.concepts_created}`));
|
|
604
|
+
console.log(chalk_1.default.gray(` Sources created: ${result.stats.sources_created}`));
|
|
605
|
+
console.log(chalk_1.default.gray(` Relationships: ${result.stats.relationships_created}`));
|
|
606
|
+
}
|
|
607
|
+
if (result.cost) {
|
|
608
|
+
console.log(chalk_1.default.blue('\nCost:'));
|
|
609
|
+
console.log(chalk_1.default.gray(` Extraction: ${result.cost.extraction}`));
|
|
610
|
+
console.log(chalk_1.default.gray(` Embeddings: ${result.cost.embeddings}`));
|
|
611
|
+
console.log(chalk_1.default.gray(` Total: ${result.cost.total}`));
|
|
612
|
+
}
|
|
613
|
+
}
|
|
614
|
+
// Ingest image command (ADR-057)
|
|
615
|
+
exports.ingestCommand
|
|
616
|
+
.command('image <path>')
|
|
617
|
+
.description('Ingest an image file using multimodal vision AI (ADR-057). Converts image to prose description using GPT-4o Vision, generates visual embeddings with Nomic Vision v1.5, then extracts concepts via standard pipeline. Supports PNG, JPEG, GIF, WebP, BMP (max 10MB). Research validated: GPT-4o 100% reliable, Nomic Vision 0.847 clustering quality (27% better than CLIP). See docs/research/vision-testing/')
|
|
618
|
+
.requiredOption('-o, --ontology <name>', 'Ontology/collection name')
|
|
619
|
+
.option('-f, --force', 'Force re-ingestion even if duplicate', false)
|
|
620
|
+
.option('--no-approve', 'Require manual approval before processing. Default: auto-approve.')
|
|
621
|
+
.option('--vision-provider <provider>', 'Vision provider: openai (default), anthropic, ollama', 'openai')
|
|
622
|
+
.option('--vision-model <model>', 'Vision model name (optional, uses provider default)')
|
|
623
|
+
.option('--filename <name>', 'Override filename for tracking')
|
|
624
|
+
.option('-w, --wait', 'Wait for job completion', false)
|
|
625
|
+
.showHelpAfterError()
|
|
626
|
+
.action(async (path, options) => {
|
|
627
|
+
try {
|
|
628
|
+
// Validate file exists
|
|
629
|
+
if (!fs.existsSync(path)) {
|
|
630
|
+
console.error(chalk_1.default.red(`â Image file not found: ${path}`));
|
|
631
|
+
process.exit(1);
|
|
632
|
+
}
|
|
633
|
+
// Validate file is an image
|
|
634
|
+
const ext = nodePath.extname(path).toLowerCase();
|
|
635
|
+
const supportedFormats = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp'];
|
|
636
|
+
if (!supportedFormats.includes(ext)) {
|
|
637
|
+
console.error(chalk_1.default.red(`â Unsupported image format: ${ext}`));
|
|
638
|
+
console.error(chalk_1.default.gray(` Supported: ${supportedFormats.join(', ')}`));
|
|
639
|
+
process.exit(1);
|
|
640
|
+
}
|
|
641
|
+
const client = (0, client_1.createClientFromEnv)();
|
|
642
|
+
const config = (0, config_1.getConfig)();
|
|
643
|
+
const autoApprove = options.approve !== false;
|
|
644
|
+
const spinner = (0, ora_1.default)('Submitting image for ingestion...').start();
|
|
645
|
+
try {
|
|
646
|
+
// Read file
|
|
647
|
+
const fileBuffer = fs.readFileSync(path);
|
|
648
|
+
const stats = fs.statSync(path);
|
|
649
|
+
// Check file size (10MB limit)
|
|
650
|
+
const maxSize = 10 * 1024 * 1024;
|
|
651
|
+
if (stats.size > maxSize) {
|
|
652
|
+
spinner.fail('Image too large');
|
|
653
|
+
console.error(chalk_1.default.red(`â Image size ${(stats.size / 1024 / 1024).toFixed(2)}MB exceeds 10MB limit`));
|
|
654
|
+
process.exit(1);
|
|
655
|
+
}
|
|
656
|
+
spinner.text = `Uploading image (${(stats.size / 1024).toFixed(1)}KB)...`;
|
|
657
|
+
// Prepare request
|
|
658
|
+
const request = {
|
|
659
|
+
ontology: options.ontology,
|
|
660
|
+
filename: options.filename,
|
|
661
|
+
force: options.force,
|
|
662
|
+
auto_approve: autoApprove,
|
|
663
|
+
vision_provider: options.visionProvider,
|
|
664
|
+
vision_model: options.visionModel,
|
|
665
|
+
// ADR-051: Source metadata
|
|
666
|
+
source_type: 'file',
|
|
667
|
+
source_path: nodePath.resolve(path),
|
|
668
|
+
source_hostname: os.hostname(),
|
|
669
|
+
};
|
|
670
|
+
// Submit to API using ingestImage method
|
|
671
|
+
const result = await client.ingestImage(path, request);
|
|
672
|
+
// Check if duplicate
|
|
673
|
+
if ('duplicate' in result && result.duplicate) {
|
|
674
|
+
spinner.warn('Duplicate detected');
|
|
675
|
+
console.log(chalk_1.default.yellow('\nâ ď¸ This image has already been ingested'));
|
|
676
|
+
console.log(chalk_1.default.gray(` Previous job: ${result.existing_job_id}`));
|
|
677
|
+
console.log(chalk_1.default.gray(` Status: ${result.status}`));
|
|
678
|
+
console.log(chalk_1.default.gray(`\n ${result.message}`));
|
|
679
|
+
if (result.use_force) {
|
|
680
|
+
console.log(chalk_1.default.gray(` ${result.use_force}`));
|
|
681
|
+
}
|
|
682
|
+
return;
|
|
683
|
+
}
|
|
684
|
+
// Type narrowed: result is JobSubmitResponse
|
|
685
|
+
const submitResult = result;
|
|
686
|
+
const jobId = submitResult.job_id;
|
|
687
|
+
spinner.succeed(`Image submitted (Job ${jobId})`);
|
|
688
|
+
console.log(chalk_1.default.blue('\nđ¸ Image Ingestion Job Created'));
|
|
689
|
+
console.log(chalk_1.default.gray(` Job ID: ${jobId}`));
|
|
690
|
+
console.log(chalk_1.default.gray(` Status: ${submitResult.status}`));
|
|
691
|
+
console.log(chalk_1.default.gray(` Ontology: ${options.ontology}`));
|
|
692
|
+
console.log(chalk_1.default.gray(` File: ${nodePath.basename(path)} (${(stats.size / 1024).toFixed(1)}KB)`));
|
|
693
|
+
console.log(chalk_1.default.gray(` Vision: ${options.visionProvider || 'openai'} (GPT-4o)`));
|
|
694
|
+
console.log(chalk_1.default.gray(`\n Monitor: ${chalk_1.default.cyan(`kg jobs status ${jobId} --watch`)}`));
|
|
695
|
+
// Wait for completion if requested
|
|
696
|
+
if (options.wait) {
|
|
697
|
+
await pollJobWithProgress(client, jobId);
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
catch (error) {
|
|
701
|
+
spinner.fail('Image ingestion failed');
|
|
702
|
+
if (error.response) {
|
|
703
|
+
const status = error.response.status;
|
|
704
|
+
const detail = error.response.data?.detail || error.response.statusText;
|
|
705
|
+
console.error(chalk_1.default.red(`\nâ API Error (${status}): ${detail}`));
|
|
706
|
+
if (status === 401 || status === 403) {
|
|
707
|
+
console.error(chalk_1.default.gray('\n Authentication required. Please login:'));
|
|
708
|
+
console.error(chalk_1.default.cyan(' kg login'));
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
else {
|
|
712
|
+
console.error(chalk_1.default.red(`\nâ ${error.message}`));
|
|
713
|
+
}
|
|
714
|
+
process.exit(1);
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
catch (error) {
|
|
718
|
+
console.error(chalk_1.default.red(`\nâ Error: ${error.message}`));
|
|
719
|
+
process.exit(1);
|
|
720
|
+
}
|
|
721
|
+
});
|
|
722
|
+
//# sourceMappingURL=ingest.js.map
|