@robthepcguy/rag-vault 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/LICENSE +24 -0
  2. package/README.md +421 -0
  3. package/dist/bin/install-skills.d.ts +20 -0
  4. package/dist/bin/install-skills.d.ts.map +1 -0
  5. package/dist/bin/install-skills.js +196 -0
  6. package/dist/bin/install-skills.js.map +1 -0
  7. package/dist/chunker/index.d.ts +11 -0
  8. package/dist/chunker/index.d.ts.map +1 -0
  9. package/dist/chunker/index.js +6 -0
  10. package/dist/chunker/index.js.map +1 -0
  11. package/dist/chunker/semantic-chunker.d.ts +96 -0
  12. package/dist/chunker/semantic-chunker.d.ts.map +1 -0
  13. package/dist/chunker/semantic-chunker.js +267 -0
  14. package/dist/chunker/semantic-chunker.js.map +1 -0
  15. package/dist/chunker/sentence-splitter.d.ts +16 -0
  16. package/dist/chunker/sentence-splitter.d.ts.map +1 -0
  17. package/dist/chunker/sentence-splitter.js +114 -0
  18. package/dist/chunker/sentence-splitter.js.map +1 -0
  19. package/dist/embedder/index.d.ts +55 -0
  20. package/dist/embedder/index.d.ts.map +1 -0
  21. package/dist/embedder/index.js +146 -0
  22. package/dist/embedder/index.js.map +1 -0
  23. package/dist/errors/index.d.ts +73 -0
  24. package/dist/errors/index.d.ts.map +1 -0
  25. package/dist/errors/index.js +170 -0
  26. package/dist/errors/index.js.map +1 -0
  27. package/dist/index.d.ts +3 -0
  28. package/dist/index.d.ts.map +1 -0
  29. package/dist/index.js +91 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/parser/html-parser.d.ts +14 -0
  32. package/dist/parser/html-parser.d.ts.map +1 -0
  33. package/dist/parser/html-parser.js +99 -0
  34. package/dist/parser/html-parser.js.map +1 -0
  35. package/dist/parser/index.d.ts +144 -0
  36. package/dist/parser/index.d.ts.map +1 -0
  37. package/dist/parser/index.js +446 -0
  38. package/dist/parser/index.js.map +1 -0
  39. package/dist/parser/pdf-filter.d.ts +89 -0
  40. package/dist/parser/pdf-filter.d.ts.map +1 -0
  41. package/dist/parser/pdf-filter.js +304 -0
  42. package/dist/parser/pdf-filter.js.map +1 -0
  43. package/dist/server/index.d.ts +144 -0
  44. package/dist/server/index.d.ts.map +1 -0
  45. package/dist/server/index.js +518 -0
  46. package/dist/server/index.js.map +1 -0
  47. package/dist/server/raw-data-utils.d.ts +81 -0
  48. package/dist/server/raw-data-utils.d.ts.map +1 -0
  49. package/dist/server/raw-data-utils.js +196 -0
  50. package/dist/server/raw-data-utils.js.map +1 -0
  51. package/dist/server/schemas.d.ts +186 -0
  52. package/dist/server/schemas.d.ts.map +1 -0
  53. package/dist/server/schemas.js +99 -0
  54. package/dist/server/schemas.js.map +1 -0
  55. package/dist/utils/config-parsers.d.ts +14 -0
  56. package/dist/utils/config-parsers.d.ts.map +1 -0
  57. package/dist/utils/config-parsers.js +47 -0
  58. package/dist/utils/config-parsers.js.map +1 -0
  59. package/dist/utils/config.d.ts +37 -0
  60. package/dist/utils/config.d.ts.map +1 -0
  61. package/dist/utils/config.js +52 -0
  62. package/dist/utils/config.js.map +1 -0
  63. package/dist/utils/logger.d.ts +36 -0
  64. package/dist/utils/logger.d.ts.map +1 -0
  65. package/dist/utils/logger.js +64 -0
  66. package/dist/utils/logger.js.map +1 -0
  67. package/dist/utils/math.d.ts +34 -0
  68. package/dist/utils/math.d.ts.map +1 -0
  69. package/dist/utils/math.js +73 -0
  70. package/dist/utils/math.js.map +1 -0
  71. package/dist/utils/process-handlers.d.ts +26 -0
  72. package/dist/utils/process-handlers.d.ts.map +1 -0
  73. package/dist/utils/process-handlers.js +69 -0
  74. package/dist/utils/process-handlers.js.map +1 -0
  75. package/dist/vectordb/index.d.ts +210 -0
  76. package/dist/vectordb/index.d.ts.map +1 -0
  77. package/dist/vectordb/index.js +613 -0
  78. package/dist/vectordb/index.js.map +1 -0
  79. package/dist/web/api-routes.d.ts +9 -0
  80. package/dist/web/api-routes.d.ts.map +1 -0
  81. package/dist/web/api-routes.js +127 -0
  82. package/dist/web/api-routes.js.map +1 -0
  83. package/dist/web/config-routes.d.ts +7 -0
  84. package/dist/web/config-routes.d.ts.map +1 -0
  85. package/dist/web/config-routes.js +54 -0
  86. package/dist/web/config-routes.js.map +1 -0
  87. package/dist/web/database-manager.d.ts +130 -0
  88. package/dist/web/database-manager.d.ts.map +1 -0
  89. package/dist/web/database-manager.js +382 -0
  90. package/dist/web/database-manager.js.map +1 -0
  91. package/dist/web/http-server.d.ts +28 -0
  92. package/dist/web/http-server.d.ts.map +1 -0
  93. package/dist/web/http-server.js +311 -0
  94. package/dist/web/http-server.js.map +1 -0
  95. package/dist/web/index.d.ts +3 -0
  96. package/dist/web/index.d.ts.map +1 -0
  97. package/dist/web/index.js +114 -0
  98. package/dist/web/index.js.map +1 -0
  99. package/dist/web/middleware/async-handler.d.ts +17 -0
  100. package/dist/web/middleware/async-handler.d.ts.map +1 -0
  101. package/dist/web/middleware/async-handler.js +26 -0
  102. package/dist/web/middleware/async-handler.js.map +1 -0
  103. package/dist/web/middleware/auth.d.ts +22 -0
  104. package/dist/web/middleware/auth.d.ts.map +1 -0
  105. package/dist/web/middleware/auth.js +81 -0
  106. package/dist/web/middleware/auth.js.map +1 -0
  107. package/dist/web/middleware/error-handler.d.ts +36 -0
  108. package/dist/web/middleware/error-handler.d.ts.map +1 -0
  109. package/dist/web/middleware/error-handler.js +68 -0
  110. package/dist/web/middleware/error-handler.js.map +1 -0
  111. package/dist/web/middleware/index.d.ts +6 -0
  112. package/dist/web/middleware/index.d.ts.map +1 -0
  113. package/dist/web/middleware/index.js +19 -0
  114. package/dist/web/middleware/index.js.map +1 -0
  115. package/dist/web/middleware/rate-limit.d.ts +38 -0
  116. package/dist/web/middleware/rate-limit.d.ts.map +1 -0
  117. package/dist/web/middleware/rate-limit.js +116 -0
  118. package/dist/web/middleware/rate-limit.js.map +1 -0
  119. package/dist/web/middleware/request-logger.d.ts +52 -0
  120. package/dist/web/middleware/request-logger.d.ts.map +1 -0
  121. package/dist/web/middleware/request-logger.js +74 -0
  122. package/dist/web/middleware/request-logger.js.map +1 -0
  123. package/dist/web/types.d.ts +6 -0
  124. package/dist/web/types.d.ts.map +1 -0
  125. package/dist/web/types.js +4 -0
  126. package/dist/web/types.js.map +1 -0
  127. package/package.json +135 -0
  128. package/skills/rag-vault/SKILL.md +111 -0
  129. package/skills/rag-vault/references/html-ingestion.md +73 -0
  130. package/skills/rag-vault/references/query-optimization.md +57 -0
  131. package/skills/rag-vault/references/result-refinement.md +54 -0
@@ -0,0 +1,146 @@
1
+ "use strict";
2
+ // Embedder implementation with Transformers.js
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.Embedder = exports.EmbeddingError = void 0;
5
+ const transformers_1 = require("@huggingface/transformers");
6
+ const index_js_1 = require("../errors/index.js");
7
+ // Re-export error class for backwards compatibility
8
+ var index_js_2 = require("../errors/index.js");
9
+ Object.defineProperty(exports, "EmbeddingError", { enumerable: true, get: function () { return index_js_2.EmbeddingError; } });
10
+ // ============================================
11
+ // Embedder Class
12
+ // ============================================
13
+ /**
14
+ * Embedding generation class using Transformers.js
15
+ *
16
+ * Responsibilities:
17
+ * - Generate embedding vectors (dimension depends on model)
18
+ * - Transformers.js wrapper
19
+ * - Batch processing (size 8)
20
+ */
21
+ class Embedder {
22
+ constructor(config) {
23
+ // Using unknown to avoid TS2590 (union type too complex with @types/jsdom)
24
+ this.model = null;
25
+ this.initPromise = null;
26
+ this.config = config;
27
+ }
28
+ /**
29
+ * Get the model name/path
30
+ */
31
+ getModelName() {
32
+ return this.config.modelPath;
33
+ }
34
+ /**
35
+ * Initialize Transformers.js model
36
+ */
37
+ async initialize() {
38
+ // Skip if already initialized
39
+ if (this.model) {
40
+ return;
41
+ }
42
+ try {
43
+ // Set cache directory BEFORE creating pipeline
44
+ transformers_1.env.cacheDir = this.config.cacheDir;
45
+ console.error(`Embedder: Setting cache directory to "${this.config.cacheDir}"`);
46
+ console.error(`Embedder: Loading model "${this.config.modelPath}"...`);
47
+ // Use type assertion to avoid TS2590 (union type too complex with @types/jsdom)
48
+ this.model = await (0, transformers_1.pipeline)('feature-extraction', this.config.modelPath);
49
+ console.error('Embedder: Model loaded successfully');
50
+ }
51
+ catch (error) {
52
+ throw new index_js_1.EmbeddingError(`Failed to initialize Embedder: ${error.message}`, error);
53
+ }
54
+ }
55
+ /**
56
+ * Ensure model is initialized (lazy initialization)
57
+ * This method is called automatically by embed() and embedBatch()
58
+ */
59
+ async ensureInitialized() {
60
+ // Already initialized
61
+ if (this.model) {
62
+ return;
63
+ }
64
+ // Initialization already in progress, wait for it
65
+ if (this.initPromise) {
66
+ await this.initPromise;
67
+ return;
68
+ }
69
+ // Start initialization
70
+ console.error('Embedder: First use detected. Initializing model (downloading ~90MB, may take 1-2 minutes)...');
71
+ this.initPromise = this.initialize().catch((error) => {
72
+ // Clear initPromise on failure to allow retry
73
+ this.initPromise = null;
74
+ // Enhance error message with detailed guidance
75
+ throw new index_js_1.EmbeddingError(`Failed to initialize embedder on first use: ${error.message}\n\nPossible causes:\n • Network connectivity issues during model download\n • Insufficient disk space (need ~90MB)\n • Corrupted model cache\n\nRecommended actions:\n 1. Check your internet connection and try again\n 2. Ensure sufficient disk space is available\n 3. If problem persists, delete cache: ${this.config.cacheDir}\n 4. Then retry your query\n`, error);
76
+ });
77
+ await this.initPromise;
78
+ }
79
+ /**
80
+ * Convert single text to embedding vector
81
+ *
82
+ * @param text - Text
83
+ * @returns Embedding vector (dimension depends on model)
84
+ */
85
+ async embed(text) {
86
+ // Lazy initialization: initialize on first use if not already initialized
87
+ await this.ensureInitialized();
88
+ try {
89
+ // Fail-fast for empty string: cannot generate meaningful embedding
90
+ if (text.length === 0) {
91
+ throw new index_js_1.EmbeddingError('Cannot generate embedding for empty text');
92
+ }
93
+ // Use type assertion to avoid complex Transformers.js type definitions
94
+ // This is due to external library type definition constraints, runtime behavior is guaranteed
95
+ const options = { pooling: 'mean', normalize: true };
96
+ const modelCall = this.model;
97
+ const output = await modelCall(text, options);
98
+ // Access raw data via .data property
99
+ const embedding = Array.from(output.data);
100
+ return embedding;
101
+ }
102
+ catch (error) {
103
+ if (error instanceof index_js_1.EmbeddingError) {
104
+ throw error;
105
+ }
106
+ throw new index_js_1.EmbeddingError(`Failed to generate embedding: ${error.message}`, error);
107
+ }
108
+ }
109
+ /**
110
+ * Convert multiple texts to embedding vectors with batch processing
111
+ *
112
+ * @param texts - Array of texts
113
+ * @param signal - Optional AbortSignal for cancellation support
114
+ * @returns Array of embedding vectors (dimension depends on model)
115
+ */
116
+ async embedBatch(texts, signal) {
117
+ // Lazy initialization: initialize on first use if not already initialized
118
+ await this.ensureInitialized();
119
+ if (texts.length === 0) {
120
+ return [];
121
+ }
122
+ try {
123
+ const embeddings = [];
124
+ // Process in batches according to batch size
125
+ for (let i = 0; i < texts.length; i += this.config.batchSize) {
126
+ // Check for cancellation before each batch
127
+ if (signal?.aborted) {
128
+ throw new index_js_1.EmbeddingError('Embedding operation was cancelled');
129
+ }
130
+ const batch = texts.slice(i, i + this.config.batchSize);
131
+ const batchEmbeddings = await Promise.all(batch.map((text) => this.embed(text)));
132
+ embeddings.push(...batchEmbeddings);
133
+ }
134
+ return embeddings;
135
+ }
136
+ catch (error) {
137
+ if (error instanceof index_js_1.EmbeddingError) {
138
+ throw error;
139
+ }
140
+ const message = error instanceof Error ? error.message : String(error);
141
+ throw new index_js_1.EmbeddingError(`Failed to generate batch embeddings: ${message}`, error instanceof Error ? error : undefined);
142
+ }
143
+ }
144
+ }
145
+ exports.Embedder = Embedder;
146
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/embedder/index.ts"],"names":[],"mappings":";AAAA,+CAA+C;;;AAE/C,4DAAyD;AACzD,iDAAmD;AAEnD,oDAAoD;AACpD,+CAAmD;AAA1C,0GAAA,cAAc,OAAA;AAkBvB,+CAA+C;AAC/C,iBAAiB;AACjB,+CAA+C;AAE/C;;;;;;;GAOG;AACH,MAAa,QAAQ;IAMnB,YAAY,MAAsB;QALlC,2EAA2E;QACnE,UAAK,GAAY,IAAI,CAAA;QACrB,gBAAW,GAAyB,IAAI,CAAA;QAI9C,IAAI,CAAC,MAAM,GAAG,MAAM,CAAA;IACtB,CAAC;IAED;;OAEG;IACH,YAAY;QACV,OAAO,IAAI,CAAC,MAAM,CAAC,SAAS,CAAA;IAC9B,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU;QACd,8BAA8B;QAC9B,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAM;QACR,CAAC;QAED,IAAI,CAAC;YACH,+CAA+C;YAC/C,kBAAG,CAAC,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAA;YAEnC,OAAO,CAAC,KAAK,CAAC,yCAAyC,IAAI,CAAC,MAAM,CAAC,QAAQ,GAAG,CAAC,CAAA;YAC/E,OAAO,CAAC,KAAK,CAAC,4BAA4B,IAAI,CAAC,MAAM,CAAC,SAAS,MAAM,CAAC,CAAA;YACtE,gFAAgF;YAChF,IAAI,CAAC,KAAK,GAAG,MAAM,IAAA,uBAAQ,EAAC,oBAAoB,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;YACxE,OAAO,CAAC,KAAK,CAAC,qCAAqC,CAAC,CAAA;QACtD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,yBAAc,CACtB,kCAAmC,KAAe,CAAC,OAAO,EAAE,EAC5D,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,KAAK,CAAC,iBAAiB;QAC7B,sBAAsB;QACtB,IAAI,IAAI,CAAC,KAAK,EAAE,CAAC;YACf,OAAM;QACR,CAAC;QAED,kDAAkD;QAClD,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACrB,MAAM,IAAI,CAAC,WAAW,CAAA;YACtB,OAAM;QACR,CAAC;QAED,uBAAuB;QACvB,OAAO,CAAC,KAAK,CACX,+FAA+F,CAChG,CAAA;QAED,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;YACnD,8CAA8C;YAC9C,IAAI,CAAC,WAAW,GAAG,IAAI,CAAA;YAEvB,+CAA+C;YAC/C,MAAM,IAAI,yBAAc,CACtB,+CAAgD,KAAe,CAAC,OAAO,wTAAwT,IAAI,CAAC,MAAM,CAAC,QAAQ,gCAAgC,EACnb,KAAc,CACf,CAAA;QACH,CAAC,CAAC,CAAA;QAEF,MAAM,IAAI,CAAC,WAAW,CAAA;IACxB,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,0EAA0E;QAC1E,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAA;QAE9B,IAAI,CAAC;YACH,mEAAmE;YACnE,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACtB,MAAM,IAAI,yBAAc,CAAC,0CAA0C,CAAC,CAAA;YACtE,CAAC;YAED,uEAAuE;YACvE,8FAA8F;YAC9F,MAAM,OAAO,GAAG,EAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,IAAI,EAAE,CAAA;YACpD,MAAM,SAAS,GAAG,IAAI,CAAC,KAGa,CAAA;YACpC,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,IAAI,EAAE,OAAO,CAAC,CAAA;YAE7C,qCAAqC;YACrC,MAAM,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAA;YACzC,OAAO,SAAS,CAAA;QAClB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,yBAAc,EAAE,CAAC;gBACpC,MAAM,KAAK,CAAA;YACb,CAAC;YACD,MAAM,IAAI,yBAAc,CACtB,iCAAkC,KAAe,CAAC,OAAO,EAAE,EAC3D,KAAc,CACf,CAAA;QACH,CAAC;IACH,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,UAAU,CAAC,KAAe,EAAE,MAAoB;QACpD,0EAA0E;QAC1E,MAAM,IAAI,CAAC,iBAAiB,EAAE,CAAA;QAE9B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,CAAA;QACX,CAAC;QAED,IAAI,CAAC;YACH,MAAM,UAAU,GAAe,EAAE,CAAA;YAEjC,6CAA6C;YAC7C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;gBAC7D,2CAA2C;gBAC3C,IAAI,MAAM,EAAE,OAAO,EAAE,CAAC;oBACpB,MAAM,IAAI,yBAAc,CAAC,mCAAmC,CAAC,CAAA;gBAC/D,CAAC;gBAED,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAA;gBACvD,MAAM,eAAe,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;gBAChF,UAAU,CAAC,IAAI,CAAC,GAAG,eAAe,CAAC,CAAA;YACrC,CAAC;YAED,OAAO,UAAU,CAAA;QACnB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,KAAK,YAAY,yBAAc,EAAE,CAAC;gBACpC,MAAM,KAAK,CAAA;YACb,CAAC;YACD,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;YACtE,MAAM,IAAI,yBAAc,CACtB,wCAAwC,OAAO,EAAE,EACjD,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAC3C,CAAA;QACH,CAAC;IACH,CAAC;CACF;AA/JD,4BA+JC"}
@@ -0,0 +1,73 @@
1
+ /**
2
+ * Error codes for type-safe error handling
3
+ */
4
+ export declare const ErrorCodes: {
5
+ readonly RAG_ERROR: "RAG_ERROR";
6
+ readonly VALIDATION_ERROR: "VALIDATION_ERROR";
7
+ readonly DATABASE_ERROR: "DATABASE_ERROR";
8
+ readonly DATABASE_CONNECTION_ERROR: "DATABASE_CONNECTION_ERROR";
9
+ readonly DATABASE_QUERY_ERROR: "DATABASE_QUERY_ERROR";
10
+ readonly PARSER_VALIDATION_ERROR: "PARSER_VALIDATION_ERROR";
11
+ readonly PARSER_FILE_OPERATION_ERROR: "PARSER_FILE_OPERATION_ERROR";
12
+ readonly PARSER_UNSUPPORTED_FORMAT: "PARSER_UNSUPPORTED_FORMAT";
13
+ readonly EMBEDDING_ERROR: "EMBEDDING_ERROR";
14
+ readonly EMBEDDING_MODEL_ERROR: "EMBEDDING_MODEL_ERROR";
15
+ readonly AUTH_REQUIRED: "AUTH_REQUIRED";
16
+ readonly AUTH_INVALID: "AUTH_INVALID";
17
+ };
18
+ export type ErrorCode = (typeof ErrorCodes)[keyof typeof ErrorCodes];
19
+ /**
20
+ * Base error class for RAG operations
21
+ */
22
+ export declare class RAGError extends Error {
23
+ readonly code: ErrorCode;
24
+ readonly statusCode: number;
25
+ readonly details: Record<string, unknown> | undefined;
26
+ constructor(message: string, options?: {
27
+ code?: ErrorCode;
28
+ statusCode?: number;
29
+ details?: Record<string, unknown>;
30
+ cause?: Error;
31
+ });
32
+ toJSON(): Record<string, unknown>;
33
+ }
34
+ /**
35
+ * Validation error for invalid input
36
+ */
37
+ export declare class ValidationError extends RAGError {
38
+ constructor(message: string, details?: Record<string, unknown>, cause?: Error);
39
+ }
40
+ /**
41
+ * Database operation error
42
+ */
43
+ export declare class DatabaseError extends RAGError {
44
+ constructor(message: string, cause?: Error, code?: ErrorCode);
45
+ }
46
+ /**
47
+ * Parser validation error (equivalent to 400)
48
+ */
49
+ export declare class ParserValidationError extends RAGError {
50
+ constructor(message: string, cause?: Error);
51
+ }
52
+ /**
53
+ * Parser file operation error (equivalent to 500)
54
+ */
55
+ export declare class ParserFileOperationError extends RAGError {
56
+ constructor(message: string, cause?: Error);
57
+ }
58
+ /**
59
+ * Embedding generation error
60
+ */
61
+ export declare class EmbeddingError extends RAGError {
62
+ constructor(message: string, cause?: Error);
63
+ }
64
+ /**
65
+ * Get error message with optional stack trace (based on environment)
66
+ */
67
+ export declare function getErrorMessage(error: Error): string;
68
+ /**
69
+ * Check if an error is a RAGError instance
70
+ * @internal - available for internal use but may be useful for consumers
71
+ */
72
+ export declare function isRAGError(error: unknown): error is RAGError;
73
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/errors/index.ts"],"names":[],"mappings":"AAMA;;GAEG;AACH,eAAO,MAAM,UAAU;;;;;;;;;;;;;CAsBb,CAAA;AAEV,MAAM,MAAM,SAAS,GAAG,CAAC,OAAO,UAAU,CAAC,CAAC,MAAM,OAAO,UAAU,CAAC,CAAA;AAMpE;;GAEG;AACH,qBAAa,QAAS,SAAQ,KAAK;IACjC,SAAgB,IAAI,EAAE,SAAS,CAAA;IAC/B,SAAgB,UAAU,EAAE,MAAM,CAAA;IAClC,SAAgB,OAAO,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GAAG,SAAS,CAAA;gBAG1D,OAAO,EAAE,MAAM,EACf,OAAO,GAAE;QACP,IAAI,CAAC,EAAE,SAAS,CAAA;QAChB,UAAU,CAAC,EAAE,MAAM,CAAA;QACnB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;QACjC,KAAK,CAAC,EAAE,KAAK,CAAA;KACT;IASR,MAAM,IAAI,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC;CASlC;AAMD;;GAEG;AACH,qBAAa,eAAgB,SAAQ,QAAQ;gBAC/B,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,KAAK,CAAC,EAAE,KAAK;CAe9E;AAMD;;GAEG;AACH,qBAAa,aAAc,SAAQ,QAAQ;gBAC7B,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK,EAAE,IAAI,GAAE,SAAqC;CAaxF;AAMD;;GAEG;AACH,qBAAa,qBAAsB,SAAQ,QAAQ;gBACrC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK;CAa3C;AAED;;GAEG;AACH,qBAAa,wBAAyB,SAAQ,QAAQ;gBACxC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK;CAa3C;AAMD;;GAEG;AACH,qBAAa,cAAe,SAAQ,QAAQ;gBAC9B,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,EAAE,KAAK;CAa3C;AAMD;;GAEG;AACH,wBAAgB,eAAe,CAAC,KAAK,EAAE,KAAK,GAAG,MAAM,CAKpD;AAED;;;GAGG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,OAAO,GAAG,KAAK,IAAI,QAAQ,CAE5D"}
@@ -0,0 +1,170 @@
1
+ "use strict";
2
+ // Centralized error classes for RAG operations
3
+ Object.defineProperty(exports, "__esModule", { value: true });
4
+ exports.EmbeddingError = exports.ParserFileOperationError = exports.ParserValidationError = exports.DatabaseError = exports.ValidationError = exports.RAGError = exports.ErrorCodes = void 0;
5
+ exports.getErrorMessage = getErrorMessage;
6
+ exports.isRAGError = isRAGError;
7
+ // ============================================
8
+ // Error Codes
9
+ // ============================================
10
+ /**
11
+ * Error codes for type-safe error handling
12
+ */
13
+ exports.ErrorCodes = {
14
+ // General
15
+ RAG_ERROR: 'RAG_ERROR',
16
+ VALIDATION_ERROR: 'VALIDATION_ERROR',
17
+ // Database
18
+ DATABASE_ERROR: 'DATABASE_ERROR',
19
+ DATABASE_CONNECTION_ERROR: 'DATABASE_CONNECTION_ERROR',
20
+ DATABASE_QUERY_ERROR: 'DATABASE_QUERY_ERROR',
21
+ // Parser
22
+ PARSER_VALIDATION_ERROR: 'PARSER_VALIDATION_ERROR',
23
+ PARSER_FILE_OPERATION_ERROR: 'PARSER_FILE_OPERATION_ERROR',
24
+ PARSER_UNSUPPORTED_FORMAT: 'PARSER_UNSUPPORTED_FORMAT',
25
+ // Embedding
26
+ EMBEDDING_ERROR: 'EMBEDDING_ERROR',
27
+ EMBEDDING_MODEL_ERROR: 'EMBEDDING_MODEL_ERROR',
28
+ // Auth
29
+ AUTH_REQUIRED: 'AUTH_REQUIRED',
30
+ AUTH_INVALID: 'AUTH_INVALID',
31
+ };
32
+ // ============================================
33
+ // Base Error Class
34
+ // ============================================
35
+ /**
36
+ * Base error class for RAG operations
37
+ */
38
+ class RAGError extends Error {
39
+ constructor(message, options = {}) {
40
+ super(message, options.cause ? { cause: options.cause } : undefined);
41
+ this.name = 'RAGError';
42
+ this.code = options.code || exports.ErrorCodes.RAG_ERROR;
43
+ this.statusCode = options.statusCode || 500;
44
+ this.details = options.details;
45
+ }
46
+ toJSON() {
47
+ return {
48
+ name: this.name,
49
+ code: this.code,
50
+ message: this.message,
51
+ statusCode: this.statusCode,
52
+ details: this.details,
53
+ };
54
+ }
55
+ }
56
+ exports.RAGError = RAGError;
57
+ // ============================================
58
+ // Validation Errors
59
+ // ============================================
60
+ /**
61
+ * Validation error for invalid input
62
+ */
63
+ class ValidationError extends RAGError {
64
+ constructor(message, details, cause) {
65
+ const opts = {
66
+ code: exports.ErrorCodes.VALIDATION_ERROR,
67
+ statusCode: 400,
68
+ };
69
+ if (details !== undefined)
70
+ opts.details = details;
71
+ if (cause !== undefined)
72
+ opts.cause = cause;
73
+ super(message, opts);
74
+ this.name = 'ValidationError';
75
+ }
76
+ }
77
+ exports.ValidationError = ValidationError;
78
+ // ============================================
79
+ // Database Errors
80
+ // ============================================
81
+ /**
82
+ * Database operation error
83
+ */
84
+ class DatabaseError extends RAGError {
85
+ constructor(message, cause, code = exports.ErrorCodes.DATABASE_ERROR) {
86
+ const opts = {
87
+ code,
88
+ statusCode: 500,
89
+ };
90
+ if (cause !== undefined)
91
+ opts.cause = cause;
92
+ super(message, opts);
93
+ this.name = 'DatabaseError';
94
+ }
95
+ }
96
+ exports.DatabaseError = DatabaseError;
97
+ // ============================================
98
+ // Parser Errors
99
+ // ============================================
100
+ /**
101
+ * Parser validation error (equivalent to 400)
102
+ */
103
+ class ParserValidationError extends RAGError {
104
+ constructor(message, cause) {
105
+ const opts = {
106
+ code: exports.ErrorCodes.PARSER_VALIDATION_ERROR,
107
+ statusCode: 400,
108
+ };
109
+ if (cause !== undefined)
110
+ opts.cause = cause;
111
+ super(message, opts);
112
+ this.name = 'ParserValidationError';
113
+ }
114
+ }
115
+ exports.ParserValidationError = ParserValidationError;
116
+ /**
117
+ * Parser file operation error (equivalent to 500)
118
+ */
119
+ class ParserFileOperationError extends RAGError {
120
+ constructor(message, cause) {
121
+ const opts = {
122
+ code: exports.ErrorCodes.PARSER_FILE_OPERATION_ERROR,
123
+ statusCode: 500,
124
+ };
125
+ if (cause !== undefined)
126
+ opts.cause = cause;
127
+ super(message, opts);
128
+ this.name = 'ParserFileOperationError';
129
+ }
130
+ }
131
+ exports.ParserFileOperationError = ParserFileOperationError;
132
+ // ============================================
133
+ // Embedding Errors
134
+ // ============================================
135
+ /**
136
+ * Embedding generation error
137
+ */
138
+ class EmbeddingError extends RAGError {
139
+ constructor(message, cause) {
140
+ const opts = {
141
+ code: exports.ErrorCodes.EMBEDDING_ERROR,
142
+ statusCode: 500,
143
+ };
144
+ if (cause !== undefined)
145
+ opts.cause = cause;
146
+ super(message, opts);
147
+ this.name = 'EmbeddingError';
148
+ }
149
+ }
150
+ exports.EmbeddingError = EmbeddingError;
151
+ // ============================================
152
+ // Utilities
153
+ // ============================================
154
+ /**
155
+ * Get error message with optional stack trace (based on environment)
156
+ */
157
+ function getErrorMessage(error) {
158
+ if (process.env['NODE_ENV'] === 'production') {
159
+ return error.message;
160
+ }
161
+ return error.stack || error.message;
162
+ }
163
+ /**
164
+ * Check if an error is a RAGError instance
165
+ * @internal - available for internal use but may be useful for consumers
166
+ */
167
+ function isRAGError(error) {
168
+ return error instanceof RAGError;
169
+ }
170
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/errors/index.ts"],"names":[],"mappings":";AAAA,+CAA+C;;;AAkM/C,0CAKC;AAMD,gCAEC;AA7MD,+CAA+C;AAC/C,cAAc;AACd,+CAA+C;AAE/C;;GAEG;AACU,QAAA,UAAU,GAAG;IACxB,UAAU;IACV,SAAS,EAAE,WAAW;IACtB,gBAAgB,EAAE,kBAAkB;IAEpC,WAAW;IACX,cAAc,EAAE,gBAAgB;IAChC,yBAAyB,EAAE,2BAA2B;IACtD,oBAAoB,EAAE,sBAAsB;IAE5C,SAAS;IACT,uBAAuB,EAAE,yBAAyB;IAClD,2BAA2B,EAAE,6BAA6B;IAC1D,yBAAyB,EAAE,2BAA2B;IAEtD,YAAY;IACZ,eAAe,EAAE,iBAAiB;IAClC,qBAAqB,EAAE,uBAAuB;IAE9C,OAAO;IACP,aAAa,EAAE,eAAe;IAC9B,YAAY,EAAE,cAAc;CACpB,CAAA;AAIV,+CAA+C;AAC/C,mBAAmB;AACnB,+CAA+C;AAE/C;;GAEG;AACH,MAAa,QAAS,SAAQ,KAAK;IAKjC,YACE,OAAe,EACf,UAKI,EAAE;QAEN,KAAK,CAAC,OAAO,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAA;QACpE,IAAI,CAAC,IAAI,GAAG,UAAU,CAAA;QACtB,IAAI,CAAC,IAAI,GAAG,OAAO,CAAC,IAAI,IAAI,kBAAU,CAAC,SAAS,CAAA;QAChD,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,GAAG,CAAA;QAC3C,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,CAAA;IAChC,CAAC;IAED,MAAM;QACJ,OAAO;YACL,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,UAAU,EAAE,IAAI,CAAC,UAAU;YAC3B,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAA;IACH,CAAC;CACF;AA9BD,4BA8BC;AAED,+CAA+C;AAC/C,oBAAoB;AACpB,+CAA+C;AAE/C;;GAEG;AACH,MAAa,eAAgB,SAAQ,QAAQ;IAC3C,YAAY,OAAe,EAAE,OAAiC,EAAE,KAAa;QAC3E,MAAM,IAAI,GAKN;YACF,IAAI,EAAE,kBAAU,CAAC,gBAAgB;YACjC,UAAU,EAAE,GAAG;SAChB,CAAA;QACD,IAAI,OAAO,KAAK,SAAS;YAAE,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;QACjD,IAAI,KAAK,KAAK,SAAS;YAAE,IAAI,CAAC,KAAK,GAAG,KAAK,CAAA;QAC3C,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;QACpB,IAAI,CAAC,IAAI,GAAG,iBAAiB,CAAA;IAC/B,CAAC;CACF;AAhBD,0CAgBC;AAED,+CAA+C;AAC/C,kBAAkB;AAClB,+CAA+C;AAE/C;;GAEG;AACH,MAAa,aAAc,SAAQ,QAAQ;IACzC,YAAY,OAAe,EAAE,KAAa,EAAE,OAAkB,kBAAU,CAAC,cAAc;QACrF,MAAM,IAAI,GAIN;YACF,IAAI;YACJ,UAAU,EAAE,GAAG;SAChB,CAAA;QACD,IAAI,KAAK,KAAK,SAAS;YAAE,IAAI,CAAC,KAAK,GAAG,KAAK,CAAA;QAC3C,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;QACpB,IAAI,CAAC,IAAI,GAAG,eAAe,CAAA;IAC7B,CAAC;CACF;AAdD,sCAcC;AAED,+CAA+C;AAC/C,gBAAgB;AAChB,+CAA+C;AAE/C;;GAEG;AACH,MAAa,qBAAsB,SAAQ,QAAQ;IACjD,YAAY,OAAe,EAAE,KAAa;QACxC,MAAM,IAAI,GAIN;YACF,IAAI,EAAE,kBAAU,CAAC,uBAAuB;YACxC,UAAU,EAAE,GAAG;SAChB,CAAA;QACD,IAAI,KAAK,KAAK,SAAS;YAAE,IAAI,CAAC,KAAK,GAAG,KAAK,CAAA;QAC3C,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;QACpB,IAAI,CAAC,IAAI,GAAG,uBAAuB,CAAA;IACrC,CAAC;CACF;AAdD,sDAcC;AAED;;GAEG;AACH,MAAa,wBAAyB,SAAQ,QAAQ;IACpD,YAAY,OAAe,EAAE,KAAa;QACxC,MAAM,IAAI,GAIN;YACF,IAAI,EAAE,kBAAU,CAAC,2BAA2B;YAC5C,UAAU,EAAE,GAAG;SAChB,CAAA;QACD,IAAI,KAAK,KAAK,SAAS;YAAE,IAAI,CAAC,KAAK,GAAG,KAAK,CAAA;QAC3C,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;QACpB,IAAI,CAAC,IAAI,GAAG,0BAA0B,CAAA;IACxC,CAAC;CACF;AAdD,4DAcC;AAED,+CAA+C;AAC/C,mBAAmB;AACnB,+CAA+C;AAE/C;;GAEG;AACH,MAAa,cAAe,SAAQ,QAAQ;IAC1C,YAAY,OAAe,EAAE,KAAa;QACxC,MAAM,IAAI,GAIN;YACF,IAAI,EAAE,kBAAU,CAAC,eAAe;YAChC,UAAU,EAAE,GAAG;SAChB,CAAA;QACD,IAAI,KAAK,KAAK,SAAS;YAAE,IAAI,CAAC,KAAK,GAAG,KAAK,CAAA;QAC3C,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAA;QACpB,IAAI,CAAC,IAAI,GAAG,gBAAgB,CAAA;IAC9B,CAAC;CACF;AAdD,wCAcC;AAED,+CAA+C;AAC/C,YAAY;AACZ,+CAA+C;AAE/C;;GAEG;AACH,SAAgB,eAAe,CAAC,KAAY;IAC1C,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,KAAK,YAAY,EAAE,CAAC;QAC7C,OAAO,KAAK,CAAC,OAAO,CAAA;IACtB,CAAC;IACD,OAAO,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,CAAA;AACrC,CAAC;AAED;;;GAGG;AACH,SAAgB,UAAU,CAAC,KAAc;IACvC,OAAO,KAAK,YAAY,QAAQ,CAAA;AAClC,CAAC"}
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":""}
package/dist/index.js ADDED
@@ -0,0 +1,91 @@
1
+ #!/usr/bin/env node
2
+ "use strict";
3
+ // Entry point for RAG MCP Server
4
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
5
+ if (k2 === undefined) k2 = k;
6
+ var desc = Object.getOwnPropertyDescriptor(m, k);
7
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
8
+ desc = { enumerable: true, get: function() { return m[k]; } };
9
+ }
10
+ Object.defineProperty(o, k2, desc);
11
+ }) : (function(o, m, k, k2) {
12
+ if (k2 === undefined) k2 = k;
13
+ o[k2] = m[k];
14
+ }));
15
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
16
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
17
+ }) : function(o, v) {
18
+ o["default"] = v;
19
+ });
20
+ var __importStar = (this && this.__importStar) || (function () {
21
+ var ownKeys = function(o) {
22
+ ownKeys = Object.getOwnPropertyNames || function (o) {
23
+ var ar = [];
24
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
25
+ return ar;
26
+ };
27
+ return ownKeys(o);
28
+ };
29
+ return function (mod) {
30
+ if (mod && mod.__esModule) return mod;
31
+ var result = {};
32
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
33
+ __setModuleDefault(result, mod);
34
+ return result;
35
+ };
36
+ })();
37
+ Object.defineProperty(exports, "__esModule", { value: true });
38
+ const install_skills_js_1 = require("./bin/install-skills.js");
39
+ const index_js_1 = require("./server/index.js");
40
+ const config_js_1 = require("./utils/config.js");
41
+ const process_handlers_js_1 = require("./utils/process-handlers.js");
42
+ // ============================================
43
+ // Subcommand Routing
44
+ // ============================================
45
+ const args = process.argv.slice(2);
46
+ // Handle "skills" subcommand
47
+ if (args[0] === 'skills') {
48
+ if (args[1] === 'install') {
49
+ // npx rag-vault skills install [options]
50
+ (0, install_skills_js_1.run)(args.slice(2));
51
+ }
52
+ else {
53
+ console.error('Unknown skills subcommand. Usage: npx rag-vault skills install [options]');
54
+ console.error('Run "npx rag-vault skills install --help" for more information.');
55
+ process.exit(1);
56
+ }
57
+ }
58
+ else if (args[0] === 'web') {
59
+ // Handle "web" subcommand - launches HTTP server with web UI
60
+ Promise.resolve().then(() => __importStar(require('./web/index.js'))).catch((error) => {
61
+ console.error('Failed to start web server:', error);
62
+ process.exit(1);
63
+ });
64
+ }
65
+ else {
66
+ // ============================================
67
+ // MCP Server (default behavior)
68
+ // ============================================
69
+ (0, process_handlers_js_1.setupProcessHandlers)();
70
+ main();
71
+ }
72
+ /**
73
+ * Entry point - Start RAG MCP Server
74
+ */
75
+ async function main() {
76
+ try {
77
+ const config = (0, config_js_1.buildRAGConfig)();
78
+ console.error('Starting RAG MCP Server...');
79
+ console.error('Configuration:', config);
80
+ // Start RAGServer
81
+ const server = new index_js_1.RAGServer(config);
82
+ await server.initialize();
83
+ await server.run();
84
+ console.error('RAG MCP Server started successfully');
85
+ }
86
+ catch (error) {
87
+ console.error('Failed to start RAG MCP Server:', error);
88
+ process.exit(1);
89
+ }
90
+ }
91
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";;AACA,iCAAiC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEjC,+DAAiE;AACjE,gDAA6C;AAC7C,iDAAkD;AAClD,qEAAkE;AAElE,+CAA+C;AAC/C,qBAAqB;AACrB,+CAA+C;AAE/C,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;AAElC,6BAA6B;AAC7B,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,QAAQ,EAAE,CAAC;IACzB,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;QAC1B,yCAAyC;QACzC,IAAA,uBAAgB,EAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAA;IACjC,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,KAAK,CAAC,0EAA0E,CAAC,CAAA;QACzF,OAAO,CAAC,KAAK,CAAC,iEAAiE,CAAC,CAAA;QAChF,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;AACH,CAAC;KAAM,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,KAAK,EAAE,CAAC;IAC7B,6DAA6D;IAC7D,kDAAO,gBAAgB,IAAE,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;QACvC,OAAO,CAAC,KAAK,CAAC,6BAA6B,EAAE,KAAK,CAAC,CAAA;QACnD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC,CAAC,CAAA;AACJ,CAAC;KAAM,CAAC;IACN,+CAA+C;IAC/C,gCAAgC;IAChC,+CAA+C;IAC/C,IAAA,0CAAoB,GAAE,CAAA;IACtB,IAAI,EAAE,CAAA;AACR,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,IAAI;IACjB,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAA,0BAAc,GAAE,CAAA;QAE/B,OAAO,CAAC,KAAK,CAAC,4BAA4B,CAAC,CAAA;QAC3C,OAAO,CAAC,KAAK,CAAC,gBAAgB,EAAE,MAAM,CAAC,CAAA;QAEvC,kBAAkB;QAClB,MAAM,MAAM,GAAG,IAAI,oBAAS,CAAC,MAAM,CAAC,CAAA;QACpC,MAAM,MAAM,CAAC,UAAU,EAAE,CAAA;QACzB,MAAM,MAAM,CAAC,GAAG,EAAE,CAAA;QAElB,OAAO,CAAC,KAAK,CAAC,qCAAqC,CAAC,CAAA;IACtD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,iCAAiC,EAAE,KAAK,CAAC,CAAA;QACvD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACjB,CAAC;AACH,CAAC"}
@@ -0,0 +1,14 @@
1
+ /**
2
+ * Parse HTML content and extract main content as Markdown
3
+ *
4
+ * Flow:
5
+ * 1. HTML string → JSDOM (DOM creation)
6
+ * 2. JSDOM → Readability (main content extraction, noise removal)
7
+ * 3. Readability result → Turndown (Markdown conversion)
8
+ *
9
+ * @param html - Raw HTML string
10
+ * @param url - Source URL (used for resolving relative links)
11
+ * @returns Markdown string of extracted content
12
+ */
13
+ export declare function parseHtml(html: string, url: string): Promise<string>;
14
+ //# sourceMappingURL=html-parser.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-parser.d.ts","sourceRoot":"","sources":["../../src/parser/html-parser.ts"],"names":[],"mappings":"AAsDA;;;;;;;;;;;GAWG;AACH,wBAAsB,SAAS,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAoD1E"}
@@ -0,0 +1,99 @@
1
+ "use strict";
2
+ // HTML Parser using Readability and Turndown
3
+ // Extracts main content from HTML and converts to Markdown
4
+ var __importDefault = (this && this.__importDefault) || function (mod) {
5
+ return (mod && mod.__esModule) ? mod : { "default": mod };
6
+ };
7
+ Object.defineProperty(exports, "__esModule", { value: true });
8
+ exports.parseHtml = parseHtml;
9
+ const readability_1 = require("@mozilla/readability");
10
+ const jsdom_1 = require("jsdom");
11
+ const turndown_1 = __importDefault(require("turndown"));
12
+ // ============================================
13
+ // Turndown Service Configuration
14
+ // ============================================
15
+ /**
16
+ * Create and configure Turndown service for HTML to Markdown conversion
17
+ */
18
+ function createTurndownService() {
19
+ const turndownService = new turndown_1.default({
20
+ headingStyle: 'atx', // Use # style headings
21
+ codeBlockStyle: 'fenced', // Use ``` for code blocks
22
+ bulletListMarker: '-', // Use - for bullet lists
23
+ emDelimiter: '_', // Use _ for emphasis
24
+ strongDelimiter: '**', // Use ** for bold
25
+ });
26
+ // Keep code blocks intact
27
+ turndownService.addRule('codeBlocks', {
28
+ filter: ['pre'],
29
+ replacement: (_content, node) => {
30
+ const element = node;
31
+ const codeElement = element.querySelector('code');
32
+ const code = codeElement ? codeElement.textContent : element.textContent;
33
+ const language = codeElement?.className?.replace('language-', '') || '';
34
+ return `\n\`\`\`${language}\n${code?.trim() || ''}\n\`\`\`\n`;
35
+ },
36
+ });
37
+ return turndownService;
38
+ }
39
+ // ============================================
40
+ // HTML Parser
41
+ // ============================================
42
+ /**
43
+ * Parse HTML content and extract main content as Markdown
44
+ *
45
+ * Flow:
46
+ * 1. HTML string → JSDOM (DOM creation)
47
+ * 2. JSDOM → Readability (main content extraction, noise removal)
48
+ * 3. Readability result → Turndown (Markdown conversion)
49
+ *
50
+ * @param html - Raw HTML string
51
+ * @param url - Source URL (used for resolving relative links)
52
+ * @returns Markdown string of extracted content
53
+ */
54
+ async function parseHtml(html, url) {
55
+ // Handle empty or whitespace-only HTML
56
+ if (!html || html.trim().length === 0) {
57
+ return '';
58
+ }
59
+ try {
60
+ // Create DOM from HTML string
61
+ const dom = new jsdom_1.JSDOM(html, {
62
+ url,
63
+ // Enable features needed for Readability
64
+ runScripts: 'outside-only',
65
+ });
66
+ const document = dom.window.document;
67
+ // Use Readability to extract main content
68
+ const reader = new readability_1.Readability(document, {
69
+ keepClasses: false,
70
+ debug: false,
71
+ });
72
+ const article = reader.parse();
73
+ // If Readability couldn't extract content, fall back to body text
74
+ if (!article || !article.content) {
75
+ // Try to get body content directly
76
+ const bodyContent = document.body?.innerHTML || '';
77
+ if (!bodyContent.trim()) {
78
+ return '';
79
+ }
80
+ // Convert raw body HTML to Markdown
81
+ const turndownService = createTurndownService();
82
+ return turndownService.turndown(bodyContent).trim();
83
+ }
84
+ // Convert extracted HTML content to Markdown
85
+ const turndownService = createTurndownService();
86
+ const markdown = turndownService.turndown(article.content);
87
+ // Add title if available
88
+ if (article.title) {
89
+ return `# ${article.title}\n\n${markdown}`.trim();
90
+ }
91
+ return markdown.trim();
92
+ }
93
+ catch (error) {
94
+ // Log error but don't throw - return empty string for graceful degradation
95
+ console.error('Failed to parse HTML:', error);
96
+ return '';
97
+ }
98
+ }
99
+ //# sourceMappingURL=html-parser.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"html-parser.js","sourceRoot":"","sources":["../../src/parser/html-parser.ts"],"names":[],"mappings":";AAAA,6CAA6C;AAC7C,2DAA2D;;;;;AAiE3D,8BAoDC;AAnHD,sDAAkD;AAClD,iCAA6B;AAC7B,wDAAsC;AActC,+CAA+C;AAC/C,iCAAiC;AACjC,+CAA+C;AAE/C;;GAEG;AACH,SAAS,qBAAqB;IAC5B,MAAM,eAAe,GAAG,IAAI,kBAAe,CAAC;QAC1C,YAAY,EAAE,KAAK,EAAE,uBAAuB;QAC5C,cAAc,EAAE,QAAQ,EAAE,0BAA0B;QACpD,gBAAgB,EAAE,GAAG,EAAE,yBAAyB;QAChD,WAAW,EAAE,GAAG,EAAE,qBAAqB;QACvC,eAAe,EAAE,IAAI,EAAE,kBAAkB;KAC1C,CAAC,CAAA;IAEF,0BAA0B;IAC1B,eAAe,CAAC,OAAO,CAAC,YAAY,EAAE;QACpC,MAAM,EAAE,CAAC,KAAK,CAAC;QACf,WAAW,EAAE,CAAC,QAAQ,EAAE,IAAI,EAAE,EAAE;YAC9B,MAAM,OAAO,GAAG,IAAe,CAAA;YAC/B,MAAM,WAAW,GAAG,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,CAAA;YACjD,MAAM,IAAI,GAAG,WAAW,CAAC,CAAC,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC,WAAW,CAAA;YACxE,MAAM,QAAQ,GAAG,WAAW,EAAE,SAAS,EAAE,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC,IAAI,EAAE,CAAA;YACvE,OAAO,WAAW,QAAQ,KAAK,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,CAAA;QAC/D,CAAC;KACF,CAAC,CAAA;IAEF,OAAO,eAAe,CAAA;AACxB,CAAC;AAED,+CAA+C;AAC/C,cAAc;AACd,+CAA+C;AAE/C;;;;;;;;;;;GAWG;AACI,KAAK,UAAU,SAAS,CAAC,IAAY,EAAE,GAAW;IACvD,uCAAuC;IACvC,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtC,OAAO,EAAE,CAAA;IACX,CAAC;IAED,IAAI,CAAC;QACH,8BAA8B;QAC9B,MAAM,GAAG,GAAG,IAAI,aAAK,CAAC,IAAI,EAAE;YAC1B,GAAG;YACH,yCAAyC;YACzC,UAAU,EAAE,cAAc;SAC3B,CAAC,CAAA;QAEF,MAAM,QAAQ,GAAG,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAA;QAEpC,0CAA0C;QAC1C,MAAM,MAAM,GAAG,IAAI,yBAAW,CAAC,QAAQ,EAAE;YACvC,WAAW,EAAE,KAAK;YAClB,KAAK,EAAE,KAAK;SACb,CAAC,CAAA;QAEF,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAA8B,CAAA;QAE1D,kEAAkE;QAClE,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YACjC,mCAAmC;YACnC,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,EAAE,SAAS,IAAI,EAAE,CAAA;YAClD,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC;gBACxB,OAAO,EAAE,CAAA;YACX,CAAC;YAED,oCAAoC;YACpC,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAA;YAC/C,OAAO,eAAe,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,CAAA;QACrD,CAAC;QAED,6CAA6C;QAC7C,MAAM,eAAe,GAAG,qBAAqB,EAAE,CAAA;QAC/C,MAAM,QAAQ,GAAG,eAAe,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAA;QAE1D,yBAAyB;QACzB,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YAClB,OAAO,KAAK,OAAO,CAAC,KAAK,OAAO,QAAQ,EAAE,CAAC,IAAI,EAAE,CAAA;QACnD,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAA;IACxB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,2EAA2E;QAC3E,OAAO,CAAC,KAAK,CAAC,uBAAuB,EAAE,KAAK,CAAC,CAAA;QAC7C,OAAO,EAAE,CAAA;IACX,CAAC;AACH,CAAC"}