@prodisco/search-libs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +379 -0
  3. package/dist/__tests__/extractor.test.d.ts +5 -0
  4. package/dist/__tests__/extractor.test.d.ts.map +1 -0
  5. package/dist/__tests__/extractor.test.js +452 -0
  6. package/dist/__tests__/extractor.test.js.map +1 -0
  7. package/dist/__tests__/library-indexer.test.d.ts +5 -0
  8. package/dist/__tests__/library-indexer.test.d.ts.map +1 -0
  9. package/dist/__tests__/library-indexer.test.js +611 -0
  10. package/dist/__tests__/library-indexer.test.js.map +1 -0
  11. package/dist/__tests__/schema.test.d.ts +5 -0
  12. package/dist/__tests__/schema.test.d.ts.map +1 -0
  13. package/dist/__tests__/schema.test.js +231 -0
  14. package/dist/__tests__/schema.test.js.map +1 -0
  15. package/dist/__tests__/script-parser.test.d.ts +5 -0
  16. package/dist/__tests__/script-parser.test.d.ts.map +1 -0
  17. package/dist/__tests__/script-parser.test.js +178 -0
  18. package/dist/__tests__/script-parser.test.js.map +1 -0
  19. package/dist/__tests__/search-engine.test.d.ts +5 -0
  20. package/dist/__tests__/search-engine.test.d.ts.map +1 -0
  21. package/dist/__tests__/search-engine.test.js +497 -0
  22. package/dist/__tests__/search-engine.test.js.map +1 -0
  23. package/dist/extractor/ast-parser.d.ts +48 -0
  24. package/dist/extractor/ast-parser.d.ts.map +1 -0
  25. package/dist/extractor/ast-parser.js +118 -0
  26. package/dist/extractor/ast-parser.js.map +1 -0
  27. package/dist/extractor/function-extractor.d.ts +20 -0
  28. package/dist/extractor/function-extractor.d.ts.map +1 -0
  29. package/dist/extractor/function-extractor.js +169 -0
  30. package/dist/extractor/function-extractor.js.map +1 -0
  31. package/dist/extractor/index.d.ts +22 -0
  32. package/dist/extractor/index.d.ts.map +1 -0
  33. package/dist/extractor/index.js +194 -0
  34. package/dist/extractor/index.js.map +1 -0
  35. package/dist/extractor/method-extractor.d.ts +30 -0
  36. package/dist/extractor/method-extractor.d.ts.map +1 -0
  37. package/dist/extractor/method-extractor.js +163 -0
  38. package/dist/extractor/method-extractor.js.map +1 -0
  39. package/dist/extractor/package-resolver.d.ts +77 -0
  40. package/dist/extractor/package-resolver.d.ts.map +1 -0
  41. package/dist/extractor/package-resolver.js +766 -0
  42. package/dist/extractor/package-resolver.js.map +1 -0
  43. package/dist/extractor/type-extractor.d.ts +15 -0
  44. package/dist/extractor/type-extractor.d.ts.map +1 -0
  45. package/dist/extractor/type-extractor.js +206 -0
  46. package/dist/extractor/type-extractor.js.map +1 -0
  47. package/dist/extractor/types.d.ts +116 -0
  48. package/dist/extractor/types.d.ts.map +1 -0
  49. package/dist/extractor/types.js +5 -0
  50. package/dist/extractor/types.js.map +1 -0
  51. package/dist/index.d.ts +18 -0
  52. package/dist/index.d.ts.map +1 -0
  53. package/dist/index.js +21 -0
  54. package/dist/index.js.map +1 -0
  55. package/dist/library-indexer.d.ts +104 -0
  56. package/dist/library-indexer.d.ts.map +1 -0
  57. package/dist/library-indexer.js +295 -0
  58. package/dist/library-indexer.js.map +1 -0
  59. package/dist/schema/base-schema.d.ts +63 -0
  60. package/dist/schema/base-schema.d.ts.map +1 -0
  61. package/dist/schema/base-schema.js +63 -0
  62. package/dist/schema/base-schema.js.map +1 -0
  63. package/dist/schema/index.d.ts +6 -0
  64. package/dist/schema/index.d.ts.map +1 -0
  65. package/dist/schema/index.js +6 -0
  66. package/dist/schema/index.js.map +1 -0
  67. package/dist/schema/schema-builder.d.ts +47 -0
  68. package/dist/schema/schema-builder.d.ts.map +1 -0
  69. package/dist/schema/schema-builder.js +236 -0
  70. package/dist/schema/schema-builder.js.map +1 -0
  71. package/dist/script/index.d.ts +6 -0
  72. package/dist/script/index.d.ts.map +1 -0
  73. package/dist/script/index.js +5 -0
  74. package/dist/script/index.js.map +1 -0
  75. package/dist/script/script-parser.d.ts +18 -0
  76. package/dist/script/script-parser.d.ts.map +1 -0
  77. package/dist/script/script-parser.js +246 -0
  78. package/dist/script/script-parser.js.map +1 -0
  79. package/dist/script/types.d.ts +32 -0
  80. package/dist/script/types.d.ts.map +1 -0
  81. package/dist/script/types.js +5 -0
  82. package/dist/script/types.js.map +1 -0
  83. package/dist/search/index.d.ts +7 -0
  84. package/dist/search/index.d.ts.map +1 -0
  85. package/dist/search/index.js +7 -0
  86. package/dist/search/index.js.map +1 -0
  87. package/dist/search/query-builder.d.ts +59 -0
  88. package/dist/search/query-builder.d.ts.map +1 -0
  89. package/dist/search/query-builder.js +103 -0
  90. package/dist/search/query-builder.js.map +1 -0
  91. package/dist/search/result-formatter.d.ts +61 -0
  92. package/dist/search/result-formatter.d.ts.map +1 -0
  93. package/dist/search/result-formatter.js +170 -0
  94. package/dist/search/result-formatter.js.map +1 -0
  95. package/dist/search/search-engine.d.ts +105 -0
  96. package/dist/search/search-engine.d.ts.map +1 -0
  97. package/dist/search/search-engine.js +245 -0
  98. package/dist/search/search-engine.js.map +1 -0
  99. package/package.json +41 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Prodisco
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,379 @@
1
+ # @prodisco/search-libs
2
+
3
+ A generic library indexing + search solution using [Orama](https://orama.com/). Extract types, methods, and functions from TypeScript libraries (via `.d.ts`) and **ESM JavaScript libraries** (best-effort), index TypeScript scripts, and provide unified structured search for AI agents.
4
+
5
+ ## Table of Contents
6
+
7
+ - [Features](#features)
8
+ - [Installation](#installation)
9
+ - [Quick Start](#quick-start)
10
+ - [API Reference](#api-reference)
11
+ - [Document Types](#document-types)
12
+ - [Architecture](#architecture)
13
+ - [How Library Indexing Works](#how-library-indexing-works-typescript--javascript)
14
+ - [Extending the Schema](#extending-the-schema)
15
+ - [License](#license)
16
+
17
+ ## Features
18
+
19
+ - **Generic Library Extraction**: Extract types (classes, interfaces, enums, type-aliases) and methods/functions from npm packages using TypeScript AST parsing (TypeScript `.d.ts` + ESM JavaScript fallback)
20
+ - **Script Indexing**: Index TypeScript scripts with automatic metadata extraction (description, keywords, API references)
21
+ - **Unified Search**: Search across types, methods, functions, and scripts with structured queries and structured output
22
+ - **Extensible Schema**: Base Orama schema with support for custom extensions
23
+ - **AI-Optimized**: Structured output designed for AI code generation agents
24
+
25
+ ## Installation
26
+
27
+ ```bash
28
+ npm install @prodisco/search-libs
29
+ ```
30
+
31
+ ## Quick Start
32
+
33
+ ```typescript
34
+ import { LibraryIndexer } from '@prodisco/search-libs';
35
+
36
+ // Create indexer with packages to extract
37
+ const indexer = new LibraryIndexer({
38
+ packages: [
39
+ { name: '@kubernetes/client-node' },
40
+ { name: '@prodisco/prometheus-client' },
41
+ { name: 'simple-statistics' },
42
+ ],
43
+ });
44
+
45
+ // Initialize - extracts and indexes all packages
46
+ await indexer.initialize();
47
+
48
+ // Search across all indexed content
49
+ const results = await indexer.search({
50
+ query: 'Pod',
51
+ documentType: 'type',
52
+ limit: 10,
53
+ });
54
+
55
+ console.log(results.results[0]);
56
+ // {
57
+ // id: 'type:@kubernetes/client-node:V1Pod',
58
+ // documentType: 'type',
59
+ // name: 'V1Pod',
60
+ // library: '@kubernetes/client-node',
61
+ // category: 'interface',
62
+ // description: 'Pod is a collection of containers...',
63
+ // properties: [...],
64
+ // typeKind: 'interface',
65
+ // }
66
+ ```
67
+
68
+ ## API Reference
69
+
70
+ ### LibraryIndexer
71
+
72
+ The main entry point for indexing and searching.
73
+
74
+ ```typescript
75
+ interface LibraryIndexerOptions {
76
+ packages: PackageConfig[];
77
+ basePath?: string; // Defaults to process.cwd()
78
+ }
79
+
80
+ interface PackageConfig {
81
+ name: string; // npm package name
82
+ typeFilter?: RegExp | ((name: string) => boolean);
83
+ methodFilter?: RegExp | ((name: string) => boolean);
84
+ }
85
+ ```
86
+
87
+ #### Methods
88
+
89
+ ##### `initialize(): Promise<{ indexed: number; errors: ExtractionError[] }>`
90
+
91
+ Extracts and indexes all configured packages.
92
+
93
+ ##### `search(options: SearchOptions): Promise<SearchResult>`
94
+
95
+ Search the index with structured queries.
96
+
97
+ ```typescript
98
+ interface SearchOptions {
99
+ query?: string; // Full-text search term
100
+ documentType?: string; // 'type' | 'method' | 'function' | 'script' | 'all'
101
+ category?: string; // Filter by category
102
+ library?: string; // Filter by library
103
+ limit?: number; // Max results (default: 10)
104
+ offset?: number; // Pagination offset
105
+ }
106
+
107
+ interface SearchResult {
108
+ results: IndexedDocument[];
109
+ totalMatches: number;
110
+ facets: {
111
+ documentType: Record<string, number>;
112
+ library: Record<string, number>;
113
+ category: Record<string, number>;
114
+ };
115
+ searchTime: number;
116
+ }
117
+ ```
118
+
119
+ ##### `addScript(filePath: string): Promise<void>`
120
+
121
+ Add a TypeScript script to the index. Automatically parses for:
122
+ - Description (from first comment block)
123
+ - Keywords (from description)
124
+ - Resource types (from filename and content AST)
125
+ - API references (from content AST)
126
+
127
+ ##### `addScriptsFromDirectory(dirPath: string): Promise<void>`
128
+
129
+ Add all TypeScript scripts from a directory.
130
+
131
+ ##### `removeScript(filePath: string): Promise<void>`
132
+
133
+ Remove a script from the index.
134
+
135
+ ##### `addDocuments(docs: IndexedDocument[]): Promise<void>`
136
+
137
+ Add custom documents to the index (e.g., from external sources).
138
+
139
+ ##### `shutdown(): Promise<void>`
140
+
141
+ Clean up resources.
142
+
143
+ ## Document Types
144
+
145
+ ### Type Documents
146
+
147
+ Extracted from `.d.ts` files (preferred). If no `.d.ts` is found, types/classes can be extracted from ESM JavaScript source (`.js/.mjs`) as a best-effort fallback (parameter/return types default to `any`).
148
+
149
+ ```typescript
150
+ {
151
+ id: 'type:@kubernetes/client-node:V1Pod',
152
+ documentType: 'type',
153
+ name: 'V1Pod',
154
+ library: '@kubernetes/client-node',
155
+ category: 'interface',
156
+ description: 'Pod is a collection of containers...',
157
+ properties: [
158
+ { name: 'metadata', type: 'V1ObjectMeta', optional: true },
159
+ { name: 'spec', type: 'V1PodSpec', optional: true },
160
+ ],
161
+ typeKind: 'interface',
162
+ nestedTypes: ['V1ObjectMeta', 'V1PodSpec'],
163
+ }
164
+ ```
165
+
166
+ ### Method Documents
167
+
168
+ Extracted from class methods:
169
+
170
+ ```typescript
171
+ {
172
+ id: 'method:@kubernetes/client-node:CoreV1Api:listNamespacedPod',
173
+ documentType: 'method',
174
+ name: 'listNamespacedPod',
175
+ library: '@kubernetes/client-node',
176
+ category: 'list',
177
+ description: 'List pods in a namespace',
178
+ parameters: [
179
+ { name: 'namespace', type: 'string', optional: false },
180
+ ],
181
+ returnType: 'Promise<V1PodList>',
182
+ signature: 'listNamespacedPod(namespace: string): Promise<V1PodList>',
183
+ }
184
+ ```
185
+
186
+ ### Script Documents
187
+
188
+ Indexed from TypeScript files:
189
+
190
+ ```typescript
191
+ {
192
+ id: 'script:get-pod-logs.ts',
193
+ documentType: 'script',
194
+ name: 'get-pod-logs',
195
+ library: 'CachedScript',
196
+ category: 'script',
197
+ description: 'Retrieves logs from a Kubernetes pod',
198
+ filePath: '/path/to/scripts/get-pod-logs.ts',
199
+ keywords: 'logs pod kubernetes',
200
+ }
201
+ ```
202
+
203
+ ## Architecture
204
+
205
+ ```
206
+ search-libs/
207
+ ├── extractor/ # TypeScript AST extraction
208
+ │ ├── type-extractor # Extract classes, interfaces, enums
209
+ │ ├── method-extractor # Extract methods from classes
210
+ │ ├── function-extractor # Extract standalone functions
211
+ │ └── package-resolver # Find .d.ts files or ESM JS entrypoints in node_modules
212
+ ├── script/ # Script parsing
213
+ │ └── script-parser # Parse scripts for metadata
214
+ ├── schema/ # Orama schema
215
+ │ ├── base-schema # Core schema fields
216
+ │ └── schema-builder # Extensibility
217
+ └── search/ # Search engine
218
+ ├── search-engine # Orama wrapper
219
+ ├── query-builder # Fluent query API
220
+ └── result-formatter # Format for AI consumption
221
+ ```
222
+
223
+ ## How library indexing works (TypeScript + JavaScript)
224
+
225
+ This section explains what happens when you call `LibraryIndexer.initialize()` for a package in `node_modules`, and how `search-libs` decides whether to index from **TypeScript declarations** or **JavaScript source**.
226
+
227
+ ### High-level flow
228
+
229
+ At a high level, indexing a package looks like:
230
+
231
+ - **Resolve package folder**: `basePath/node_modules/<packageName>/`
232
+ - **Decide extraction strategy**:
233
+ - Prefer **TypeScript declarations** (`.d.ts`) when discoverable
234
+ - Otherwise, attempt **ESM JavaScript source fallback** (`.js/.mjs`)
235
+ - **Extract documents**:
236
+ - Types (classes/interfaces/enums/type-aliases)
237
+ - Methods (class methods)
238
+ - Functions (standalone functions)
239
+ - **Insert into Orama** and expose them via `search()`
240
+
241
+ ### TypeScript packages (declaration-first)
242
+
243
+ For TypeScript libraries (or JS libraries that ship `.d.ts`), extraction is **declaration-first**:
244
+
245
+ #### 1) Finding `.d.ts` files
246
+
247
+ `search-libs` attempts to locate a main `.d.ts` and then scans for additional `.d.ts` files:
248
+
249
+ - **Main declaration** candidates:
250
+ - `package.json` `"types"` / `"typings"`
251
+ - `package.json` `"exports"["."]["types"]`
252
+ - common fallbacks like `dist/index.d.ts`, `lib/index.d.ts`, `index.d.ts`
253
+ - **Additional declarations**:
254
+ - Walks the package’s `types/`, `typings/`, `dist/`, `lib/`, and `src/` trees (bounded depth)
255
+ - Skips common test/internal files (e.g. `*.test.*`, `*.spec.*`, names containing `__`)
256
+
257
+ #### 2) Understanding what’s “public”
258
+
259
+ Some packages have internal class names that are re-exported or aliased at the entrypoint. To reduce noise for method indexing, `search-libs` parses the package’s **main `.d.ts`** and builds:
260
+
261
+ - **Public export set**: the names users can import
262
+ - **Alias map**: internal names → public names (e.g. `ObjectCoreV1Api` → `CoreV1Api`)
263
+
264
+ It follows `export * from './x'` chains (relative only) to build a more complete public view.
265
+
266
+ #### 3) Extracting types / methods / functions
267
+
268
+ Once `.d.ts` files are discovered, each file is parsed with the TypeScript compiler AST and we extract:
269
+
270
+ - **Types**: `class`, `interface`, `enum`, and simple `type` aliases
271
+ - Properties are captured as text (type strings from `.d.ts`)
272
+ - Nested type references are detected for better searchability
273
+ - **Methods**:
274
+ - Extracted from class declarations
275
+ - If a public export set exists, methods are indexed only for **publicly exported classes**
276
+ - Aliases are applied so class names match what users import
277
+ - **Functions**:
278
+ - Extracts function declarations and exported function-valued variables
279
+
280
+ Notes:
281
+
282
+ - Types are extracted from all discovered `.d.ts` files (often includes internal-but-useful helper types).
283
+ - `LibraryIndexer` can expand complex parameter/return types by looking up extracted types and embedding a compact definition in method docs.
284
+
285
+ ### JavaScript packages (ESM source fallback)
286
+
287
+ If **no `.d.ts` files are discoverable**, `search-libs` attempts to index the package’s **ESM JavaScript source**.
288
+
289
+ #### 1) Finding an ESM entry file
290
+
291
+ Entry resolution is based on `package.json` and common build layouts:
292
+
293
+ - Prefer `exports['.']` with `"import"` (then `"default"`)
294
+ - Then `"module"`
295
+ - Then `"main"` **only when** the package has `"type": "module"` (for `.js`)
296
+ - Plus common fallbacks (`dist/index.js`, `lib/index.js`, `index.js`, and `.mjs` variants)
297
+
298
+ Only `.js` (ESM via `"type":"module"`) and `.mjs` are considered. CommonJS (`.cjs`) is intentionally ignored.
299
+
300
+ #### 2) Computing the public surface (exports)
301
+
302
+ JavaScript libraries often re-export from multiple files. To avoid indexing internal helpers, `search-libs` first computes the **public export surface** by traversing the entry’s static export graph (relative only).
303
+
304
+ Supported patterns include:
305
+
306
+ - `export { a, b as c } from './x.js'`
307
+ - `export * from './x.js'` (does not re-export `default`)
308
+ - `export { a, b as c }` (local exports)
309
+ - import + re-export:
310
+
311
+ ```js
312
+ import { foo as localFoo } from './x.js';
313
+ export { localFoo as foo };
314
+ ```
315
+
316
+ - direct exports:
317
+ - `export function foo() {}`
318
+ - `export class Foo {}`
319
+ - `export const foo = () => {}`
320
+ - `export default <identifier>` (best-effort; indexed under the name `default` when resolvable)
321
+
322
+ From this traversal, `search-libs` builds:
323
+
324
+ - a **per-file allowlist** of declaration names that are actually part of the public API
325
+ - a **per-file alias map** for renamed exports (`internalFn` → `publicFn`)
326
+
327
+ Only relative (`./...`) re-exports are followed. Non-relative re-exports (from dependencies) are ignored.
328
+
329
+ #### 3) Extracting from JavaScript source
330
+
331
+ For each JS module that contributes exports, `search-libs` runs the same AST extractors as TypeScript, but applies the allowlist/aliases so only public symbols are indexed:
332
+
333
+ - **Exported functions**: indexed with parameter/return types defaulting to `any`
334
+ - **Exported classes**: indexed as type documents, and their methods are indexed as method documents
335
+ - **Descriptions**: pulled from JSDoc comment blocks when present (e.g. `/** ... */`)
336
+
337
+ ### Filters and tuning
338
+
339
+ You can control noise and focus via `PackageConfig`:
340
+
341
+ - `typeFilter`: include only matching type names
342
+ - `methodFilter`: include only matching method/function names
343
+ - `classFilter`: include methods only from matching class names (applies to the public/aliased class name)
344
+
345
+ ### Limitations (by design)
346
+
347
+ - **CommonJS** (`module.exports`, `exports.*`) is not supported by the JS fallback.
348
+ - **Dynamic exports** are not supported (computed exports, runtime mutation, etc.).
349
+ - **Re-exports from dependencies** (non-relative specifiers like `'lodash'`) are ignored by the JS fallback.
350
+ - JS fallback is **best-effort**: it parses syntax but does not run a type checker; parameter/return types default to `any`.
351
+
352
+ ### Tips for best results
353
+
354
+ - If you can, ship `.d.ts` (or add `@types/<pkg>`): declaration-first indexing produces richer type signatures.
355
+ - For JS-only ESM libraries:
356
+ - Prefer **static named exports** over dynamic export patterns
357
+ - Add **JSDoc descriptions** on exported functions/classes/methods to improve search quality
358
+ - Keep exports **shallow and explicit** at the entrypoint for a clearer public surface
359
+
360
+ ## Extending the Schema
361
+
362
+ For domain-specific fields, use the schema builder:
363
+
364
+ ```typescript
365
+ import { buildSchema, SearchEngine } from '@prodisco/search-libs';
366
+
367
+ const customSchema = buildSchema({
368
+ extensions: {
369
+ customField: 'string',
370
+ customEnum: 'enum',
371
+ },
372
+ });
373
+
374
+ const engine = new SearchEngine({ schema: customSchema });
375
+ ```
376
+
377
+ ## License
378
+
379
+ MIT
@@ -0,0 +1,5 @@
1
+ /**
2
+ * Tests for the extractor module
3
+ */
4
+ export {};
5
+ //# sourceMappingURL=extractor.test.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"extractor.test.d.ts","sourceRoot":"","sources":["../../src/__tests__/extractor.test.ts"],"names":[],"mappings":"AAAA;;GAEG"}