@byted-las/contextlake-openclaw 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/dist/index.d.ts +2 -1
  2. package/dist/index.js +5 -5
  3. package/dist/src/client/lancedb.js +13 -4
  4. package/dist/src/commands/cli.d.ts +3 -1
  5. package/dist/src/commands/cli.js +66 -5
  6. package/dist/src/commands/index.d.ts +2 -1
  7. package/dist/src/commands/index.js +6 -29
  8. package/dist/src/commands/slashcmd.d.ts +2 -1
  9. package/dist/src/commands/tools.d.ts +8 -218
  10. package/dist/src/commands/tools.js +71 -73
  11. package/dist/src/lib/actions/ingest.d.ts +2 -1
  12. package/dist/src/lib/actions/ingest.js +4 -9
  13. package/dist/src/lib/actions/manage.d.ts +3 -2
  14. package/dist/src/{skills/las-data-profiler/index.d.ts → lib/actions/profiler.d.ts} +1 -2
  15. package/dist/src/{skills/las-data-profiler/index.js → lib/actions/profiler.js} +2 -2
  16. package/dist/src/lib/actions/retrieve.d.ts +2 -1
  17. package/dist/src/lib/actions/retrieve.js +0 -10
  18. package/dist/src/processor/loader.js +9 -2
  19. package/dist/src/service/embedding/factory.js +1 -1
  20. package/dist/src/service/embedding/interface.d.ts +3 -1
  21. package/dist/src/service/embedding/local.js +16 -13
  22. package/dist/src/service/embedding/remote.d.ts +6 -0
  23. package/dist/src/service/embedding/remote.js +77 -7
  24. package/dist/src/skills/SKILL.md +174 -0
  25. package/dist/src/skills/contextlake-delete/SKILL.md +36 -0
  26. package/dist/src/skills/contextlake-ingest/SKILL.md +40 -0
  27. package/dist/src/skills/contextlake-list/SKILL.md +22 -0
  28. package/dist/src/skills/contextlake-retrieve/SKILL.md +37 -0
  29. package/dist/src/skills/las-data-profiler/SKILL.md +174 -0
  30. package/dist/src/utils/config.d.ts +34 -1
  31. package/dist/src/utils/config.js +6 -2
  32. package/index.ts +8 -8
  33. package/package.json +8 -3
  34. package/src/client/lancedb.ts +32 -21
  35. package/src/commands/cli.ts +73 -7
  36. package/src/commands/index.ts +15 -36
  37. package/src/commands/slashcmd.ts +2 -3
  38. package/src/commands/tools.ts +102 -85
  39. package/src/lib/actions/ingest.ts +12 -16
  40. package/src/lib/actions/manage.ts +6 -5
  41. package/src/{skills/las-data-profiler/index.ts → lib/actions/profiler.ts} +3 -3
  42. package/src/lib/actions/retrieve.ts +14 -24
  43. package/src/lib/scripts/s3_catalog.py +608 -0
  44. package/src/processor/loader.ts +12 -4
  45. package/src/service/embedding/factory.ts +1 -1
  46. package/src/service/embedding/interface.ts +3 -1
  47. package/src/service/embedding/local.ts +32 -29
  48. package/src/service/embedding/remote.ts +101 -17
  49. package/src/service/storage/factory.ts +2 -2
  50. package/src/utils/config.ts +49 -7
  51. package/dist/src/skills/las-data-profiler/register.d.ts +0 -1
  52. package/dist/src/skills/las-data-profiler/register.js +0 -19
  53. package/src/skills/las-data-profiler/register.ts +0 -19
  54. /package/{src/skills/las-data-profiler → dist/src/lib/scripts}/s3_catalog.py +0 -0
package/dist/index.d.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import type { OpenClawPluginApi } from 'openclaw/plugin-sdk';
1
2
  declare const plugin: {
2
3
  id: string;
3
4
  name: string;
@@ -108,6 +109,6 @@ declare const plugin: {
108
109
  };
109
110
  };
110
111
  };
111
- register(ctx: any): void;
112
+ register(ctx: OpenClawPluginApi): void;
112
113
  };
113
114
  export default plugin;
package/dist/index.js CHANGED
@@ -4,7 +4,7 @@ const commands_1 = require("./src/commands");
4
4
  const plugin = {
5
5
  id: 'contextlake-openclaw',
6
6
  name: 'ContextLake',
7
- version: '1.1.0',
7
+ version: '1.0.2',
8
8
  description: 'A lightweight knowledge base plugin for OpenClaw using LanceDB and TOS, with data profiling support',
9
9
  configSchema: {
10
10
  type: 'object',
@@ -58,10 +58,10 @@ const plugin = {
58
58
  },
59
59
  register(ctx) {
60
60
  const logger = ctx.logger || {
61
- info: (msg, ...args) => console.log(msg, ...args),
62
- warn: (msg, ...args) => console.warn(msg, ...args),
63
- error: (msg, ...args) => console.error(msg, ...args),
64
- debug: (msg, ...args) => console.debug(msg, ...args),
61
+ info: (msg) => console.log(msg),
62
+ warn: (msg) => console.warn(msg),
63
+ error: (msg) => console.error(msg),
64
+ debug: (msg) => console.debug(msg),
65
65
  };
66
66
  // Add logging
67
67
  logger.info(`[${new Date().toISOString()}] [ContextLake] Plugin register started`);
@@ -60,7 +60,7 @@ class ContextLakeLanceDBClient {
60
60
  else {
61
61
  if (dim <= 0) {
62
62
  // Fallback: use embedding provider to infer dimension only if needed
63
- const dummyVec = await this.embeddingProvider.generateEmbedding("init");
63
+ const dummyVec = await this.embeddingProvider.generateEmbedding('init');
64
64
  dim = dummyVec.length;
65
65
  }
66
66
  // @ts-ignore
@@ -88,10 +88,18 @@ class ContextLakeLanceDBClient {
88
88
  await table.add(docs);
89
89
  }
90
90
  async search(query, limit = 5, filter) {
91
- const vector = await this.embeddingProvider.generateEmbedding(query);
91
+ const normalizedLimit = Number.isFinite(limit) ? Math.max(1, Math.floor(limit)) : 5;
92
92
  const table = await this.getTable();
93
+ if (!query || !query.trim()) {
94
+ let fallbackQuery = table.query().limit(normalizedLimit);
95
+ if (filter) {
96
+ fallbackQuery = fallbackQuery.where(filter);
97
+ }
98
+ return await fallbackQuery.toArray();
99
+ }
100
+ const vector = await this.embeddingProvider.generateEmbedding(query);
93
101
  // @ts-ignore
94
- let search = table.vectorSearch(vector).limit(limit);
102
+ let search = table.vectorSearch(vector).limit(normalizedLimit);
95
103
  if (filter) {
96
104
  search = search.where(filter);
97
105
  }
@@ -102,8 +110,9 @@ class ContextLakeLanceDBClient {
102
110
  await table.delete(filter);
103
111
  }
104
112
  async list(limit = 100, filter) {
113
+ const normalizedLimit = Number.isFinite(limit) ? Math.max(1, Math.floor(limit)) : 100;
105
114
  const table = await this.getTable();
106
- let query = table.query().limit(limit);
115
+ let query = table.query().limit(normalizedLimit);
107
116
  if (filter) {
108
117
  query = query.where(filter);
109
118
  }
@@ -1,4 +1,6 @@
1
- export declare function getCliCommands(pluginConfig: any, logger: any): {
1
+ import { ContextLakeConfig } from '../utils/config';
2
+ export declare function getCliCommands(pluginConfig: ContextLakeConfig, logger: any): {
3
+ connectAction: (datasource_name: string, options: any) => Promise<void>;
2
4
  ingestAction: (files: any, options: any) => Promise<void>;
3
5
  searchAction: (query: any, options: any) => Promise<void>;
4
6
  listAction: (options: any) => Promise<void>;
@@ -1,20 +1,81 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.getCliCommands = getCliCommands;
4
+ // @ts-ignore
4
5
  const ingest_1 = require("../lib/actions/ingest");
5
6
  const retrieve_1 = require("../lib/actions/retrieve");
6
7
  const manage_1 = require("../lib/actions/manage");
8
+ const profiler_1 = require("../lib/actions/profiler");
9
+ function parseOptionalInt(value, fallback) {
10
+ const parsed = Number.parseInt(String(value), 10);
11
+ return Number.isFinite(parsed) ? parsed : fallback;
12
+ }
13
+ function parseMetadata(metadata) {
14
+ if (!metadata) {
15
+ return {};
16
+ }
17
+ if (typeof metadata === 'object') {
18
+ return metadata;
19
+ }
20
+ if (typeof metadata !== 'string') {
21
+ throw new Error('metadata must be a JSON object or JSON string');
22
+ }
23
+ try {
24
+ const parsed = JSON.parse(metadata);
25
+ if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) {
26
+ throw new Error('metadata must be a JSON object');
27
+ }
28
+ return parsed;
29
+ }
30
+ catch (error) {
31
+ throw new Error(`Invalid metadata JSON: ${error.message}`);
32
+ }
33
+ }
7
34
  function getCliCommands(pluginConfig, logger) {
8
35
  return {
36
+ connectAction: async (datasource_name, options) => {
37
+ logger.info(`[${new Date().toISOString()}] [ContextLake] CLI connect started`, { datasource_name, options });
38
+ try {
39
+ const params = {
40
+ datasource_name,
41
+ vendor: options.vendor,
42
+ endpoint: options.endpoint,
43
+ access_key: options.ak,
44
+ secret_key: options.sk,
45
+ region: options.region,
46
+ bucket: options.bucket,
47
+ prefix: options.prefix,
48
+ sample_rows: parseInt(options.sampleRows),
49
+ };
50
+ // eslint-disable-next-line no-console
51
+ console.log(`[contextlake connect] Connecting to datasource "${datasource_name}"...`);
52
+ // eslint-disable-next-line no-console
53
+ console.log(` vendor: ${params.vendor}`);
54
+ // eslint-disable-next-line no-console
55
+ console.log(` bucket: ${params.bucket}`);
56
+ // eslint-disable-next-line no-console
57
+ console.log(` prefix: ${params.prefix}`);
58
+ const result = await (0, profiler_1.connectDataSource)(params);
59
+ // eslint-disable-next-line no-console
60
+ console.log(JSON.stringify(result, null, 2));
61
+ logger.info(`[${new Date().toISOString()}] [ContextLake] CLI connect success`);
62
+ }
63
+ catch (e) {
64
+ // eslint-disable-next-line no-console
65
+ console.error('Error:', e.message);
66
+ logger.error(`[${new Date().toISOString()}] [ContextLake] CLI connect failed`, { error: e.message, stack: e.stack });
67
+ process.exitCode = 1;
68
+ }
69
+ },
9
70
  ingestAction: async (files, options) => {
10
71
  logger.info(`[${new Date().toISOString()}] [ContextLake] CLI ingest started`, { files, options });
11
72
  try {
12
- const metadata = options.metadata ? JSON.parse(options.metadata) : {};
73
+ const metadata = parseMetadata(options.metadata);
13
74
  const result = await (0, ingest_1.ingestAssets)({
14
75
  files,
15
76
  metadata,
16
- chunkSize: parseInt(options.chunkSize),
17
- overlap: parseInt(options.overlap)
77
+ chunkSize: parseOptionalInt(options.chunkSize, 500),
78
+ overlap: parseOptionalInt(options.overlap, 50)
18
79
  }, pluginConfig, logger);
19
80
  // eslint-disable-next-line no-console
20
81
  console.log(JSON.stringify(result, null, 2));
@@ -30,7 +91,7 @@ function getCliCommands(pluginConfig, logger) {
30
91
  try {
31
92
  const result = await (0, retrieve_1.retrieveAssets)({
32
93
  query,
33
- top_k: parseInt(options.topK),
94
+ top_k: parseOptionalInt(options.topK, 5),
34
95
  filter: options.filter,
35
96
  include_binary: options.binary
36
97
  }, pluginConfig, logger);
@@ -47,7 +108,7 @@ function getCliCommands(pluginConfig, logger) {
47
108
  logger.info(`[${new Date().toISOString()}] [ContextLake] CLI list started`, { options });
48
109
  try {
49
110
  const result = await (0, manage_1.listAssets)({
50
- limit: parseInt(options.limit)
111
+ limit: parseOptionalInt(options.limit, 100)
51
112
  }, pluginConfig, logger);
52
113
  // eslint-disable-next-line no-console
53
114
  console.log(JSON.stringify(result, null, 2));
@@ -1 +1,2 @@
1
- export declare function registerAll(ctx: any, logger: any): void;
1
+ import type { OpenClawPluginApi, PluginLogger } from 'openclaw/plugin-sdk';
2
+ export declare function registerAll(ctx: OpenClawPluginApi, logger: PluginLogger): void;
@@ -5,7 +5,6 @@ const tools_1 = require("./tools");
5
5
  const cli_1 = require("./cli");
6
6
  const slashcmd_1 = require("./slashcmd");
7
7
  const config_1 = require("../utils/config");
8
- const las_data_profiler_1 = require("../skills/las-data-profiler");
9
8
  function registerAll(ctx, logger) {
10
9
  const pluginConfig = (0, config_1.getPluginConfig)(ctx);
11
10
  // Register Agent Tools
@@ -19,9 +18,11 @@ function registerAll(ctx, logger) {
19
18
  logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.listTool.name}`);
20
19
  ctx.registerTool(tools.deleteTool);
21
20
  logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.deleteTool.name}`);
21
+ ctx.registerTool(tools.lasDataProfilerTool);
22
+ logger.info(`[${new Date().toISOString()}] [ContextLake] Tool registered: ${tools.lasDataProfilerTool.name}`);
22
23
  }
23
24
  catch (error) {
24
- logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering agent tools: ${error.message}`, { stack: error.stack });
25
+ logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering agent tools: ${error.message}${error.stack ? '\\n' + error.stack : ''}`);
25
26
  throw error;
26
27
  }
27
28
  // Register CLI
@@ -43,31 +44,7 @@ function registerAll(ctx, logger) {
43
44
  .requiredOption('--bucket <bucket>', 'Bucket name (or local root directory for local vendor)')
44
45
  .requiredOption('--prefix <prefix>', 'Path prefix to limit scan scope')
45
46
  .option('--sample-rows <number>', 'Number of rows to sample per structured file', '100')
46
- .action(async (datasource_name, options) => {
47
- try {
48
- const params = {
49
- datasource_name,
50
- vendor: options.vendor,
51
- endpoint: options.endpoint,
52
- access_key: options.ak,
53
- secret_key: options.sk,
54
- region: options.region,
55
- bucket: options.bucket,
56
- prefix: options.prefix,
57
- sample_rows: parseInt(options.sampleRows),
58
- };
59
- console.log(`[contextlake connect] Connecting to datasource "${datasource_name}"...`);
60
- console.log(` vendor: ${params.vendor}`);
61
- console.log(` bucket: ${params.bucket}`);
62
- console.log(` prefix: ${params.prefix}`);
63
- const result = await (0, las_data_profiler_1.connectDataSource)(params);
64
- console.log(JSON.stringify(result, null, 2));
65
- }
66
- catch (e) {
67
- console.error('Error:', e.message);
68
- process.exitCode = 1;
69
- }
70
- });
47
+ .action(commands.connectAction);
71
48
  // Ingest
72
49
  contextlake.command('ingest <files...>')
73
50
  .description('Ingest one or more files into the knowledge base')
@@ -97,7 +74,7 @@ function registerAll(ctx, logger) {
97
74
  logger.info(`[${new Date().toISOString()}] [ContextLake] CLI commands registered`);
98
75
  }
99
76
  catch (error) {
100
- logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering CLI commands: ${error.message}`, { stack: error.stack });
77
+ logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering CLI commands: ${error.message}${error.stack ? '\\n' + error.stack : ''}`);
101
78
  throw error;
102
79
  }
103
80
  // Register Slash Commands
@@ -134,6 +111,6 @@ function registerAll(ctx, logger) {
134
111
  logger.info(`[${new Date().toISOString()}] [ContextLake] Slash commands registered`);
135
112
  }
136
113
  catch (error) {
137
- logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering Slash commands: ${error.message}`, { stack: error.stack });
114
+ logger.error(`[${new Date().toISOString()}] [ContextLake] Error registering Slash commands: ${error.message}${error.stack ? '\\n' + error.stack : ''}`);
138
115
  }
139
116
  }
@@ -1,4 +1,5 @@
1
- export declare function getSlashCommands(pluginConfig: any, logger: any): {
1
+ import { ContextLakeConfig } from '../utils/config';
2
+ export declare function getSlashCommands(pluginConfig: ContextLakeConfig, logger: any): {
2
3
  ingestHandler: (commandCtx: any) => Promise<{
3
4
  text: string;
4
5
  }>;
@@ -1,219 +1,9 @@
1
- export declare function getAgentTools(pluginConfig: any, logger: any): {
2
- ingestTool: {
3
- name: string;
4
- label: string;
5
- description: string;
6
- parameters: {
7
- type: string;
8
- properties: {
9
- files: {
10
- type: string;
11
- items: {
12
- type: string;
13
- };
14
- description: string;
15
- };
16
- metadata: {
17
- type: string;
18
- description: string;
19
- additionalProperties: boolean;
20
- };
21
- chunkSize: {
22
- type: string;
23
- description: string;
24
- };
25
- overlap: {
26
- type: string;
27
- description: string;
28
- };
29
- };
30
- required: string[];
31
- additionalProperties: boolean;
32
- };
33
- schema: {
34
- type: string;
35
- properties: {
36
- files: {
37
- type: string;
38
- items: {
39
- type: string;
40
- };
41
- description: string;
42
- };
43
- metadata: {
44
- type: string;
45
- description: string;
46
- additionalProperties: boolean;
47
- };
48
- chunkSize: {
49
- type: string;
50
- description: string;
51
- };
52
- overlap: {
53
- type: string;
54
- description: string;
55
- };
56
- };
57
- required: string[];
58
- additionalProperties: boolean;
59
- };
60
- execute(toolCallId: string, params: any): Promise<{
61
- success: boolean;
62
- result: any;
63
- error?: undefined;
64
- } | {
65
- success: boolean;
66
- error: any;
67
- result?: undefined;
68
- }>;
69
- };
70
- retrieveTool: {
71
- name: string;
72
- label: string;
73
- description: string;
74
- parameters: {
75
- type: string;
76
- properties: {
77
- query: {
78
- type: string;
79
- description: string;
80
- };
81
- top_k: {
82
- type: string;
83
- description: string;
84
- };
85
- filter: {
86
- type: string;
87
- description: string;
88
- };
89
- include_binary: {
90
- type: string;
91
- description: string;
92
- };
93
- };
94
- required: string[];
95
- additionalProperties: boolean;
96
- };
97
- schema: {
98
- type: string;
99
- properties: {
100
- query: {
101
- type: string;
102
- description: string;
103
- };
104
- top_k: {
105
- type: string;
106
- description: string;
107
- };
108
- filter: {
109
- type: string;
110
- description: string;
111
- };
112
- include_binary: {
113
- type: string;
114
- description: string;
115
- };
116
- };
117
- required: string[];
118
- additionalProperties: boolean;
119
- };
120
- execute(toolCallId: string, params: any): Promise<{
121
- success: boolean;
122
- result: any[];
123
- error?: undefined;
124
- } | {
125
- success: boolean;
126
- error: any;
127
- result?: undefined;
128
- }>;
129
- };
130
- listTool: {
131
- name: string;
132
- label: string;
133
- description: string;
134
- parameters: {
135
- type: string;
136
- properties: {
137
- limit: {
138
- type: string;
139
- description: string;
140
- };
141
- };
142
- required: never[];
143
- additionalProperties: boolean;
144
- };
145
- schema: {
146
- type: string;
147
- properties: {
148
- limit: {
149
- type: string;
150
- description: string;
151
- };
152
- };
153
- required: never[];
154
- };
155
- execute(toolCallId: string, params: any): Promise<{
156
- success: boolean;
157
- result: any;
158
- error?: undefined;
159
- } | {
160
- success: boolean;
161
- error: any;
162
- result?: undefined;
163
- }>;
164
- };
165
- deleteTool: {
166
- name: string;
167
- label: string;
168
- description: string;
169
- parameters: {
170
- type: string;
171
- properties: {
172
- file_ids: {
173
- type: string;
174
- items: {
175
- type: string;
176
- };
177
- description: string;
178
- };
179
- filter: {
180
- type: string;
181
- description: string;
182
- };
183
- };
184
- required: never[];
185
- additionalProperties: boolean;
186
- };
187
- schema: {
188
- type: string;
189
- properties: {
190
- file_ids: {
191
- type: string;
192
- items: {
193
- type: string;
194
- };
195
- description: string;
196
- };
197
- filter: {
198
- type: string;
199
- description: string;
200
- };
201
- };
202
- required: never[];
203
- };
204
- execute(toolCallId: string, params: any): Promise<{
205
- success: boolean;
206
- result: {
207
- status: string;
208
- message: string;
209
- deleted_count: number;
210
- storage_cleaned: number;
211
- };
212
- error?: undefined;
213
- } | {
214
- success: boolean;
215
- error: any;
216
- result?: undefined;
217
- }>;
218
- };
1
+ import { ContextLakeConfig } from '../utils/config';
2
+ import type { AnyAgentTool } from 'openclaw/plugin-sdk';
3
+ export declare function getAgentTools(pluginConfig: ContextLakeConfig, logger: any): {
4
+ ingestTool: AnyAgentTool;
5
+ retrieveTool: AnyAgentTool;
6
+ listTool: AnyAgentTool;
7
+ deleteTool: AnyAgentTool;
8
+ lasDataProfilerTool: AnyAgentTool;
219
9
  };