@gmickel/gno 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +256 -0
  2. package/assets/skill/SKILL.md +112 -0
  3. package/assets/skill/cli-reference.md +327 -0
  4. package/assets/skill/examples.md +234 -0
  5. package/assets/skill/mcp-reference.md +159 -0
  6. package/package.json +90 -0
  7. package/src/app/constants.ts +313 -0
  8. package/src/cli/colors.ts +65 -0
  9. package/src/cli/commands/ask.ts +545 -0
  10. package/src/cli/commands/cleanup.ts +105 -0
  11. package/src/cli/commands/collection/add.ts +120 -0
  12. package/src/cli/commands/collection/index.ts +10 -0
  13. package/src/cli/commands/collection/list.ts +108 -0
  14. package/src/cli/commands/collection/remove.ts +64 -0
  15. package/src/cli/commands/collection/rename.ts +95 -0
  16. package/src/cli/commands/context/add.ts +67 -0
  17. package/src/cli/commands/context/check.ts +153 -0
  18. package/src/cli/commands/context/index.ts +10 -0
  19. package/src/cli/commands/context/list.ts +109 -0
  20. package/src/cli/commands/context/rm.ts +52 -0
  21. package/src/cli/commands/doctor.ts +393 -0
  22. package/src/cli/commands/embed.ts +462 -0
  23. package/src/cli/commands/get.ts +356 -0
  24. package/src/cli/commands/index-cmd.ts +119 -0
  25. package/src/cli/commands/index.ts +102 -0
  26. package/src/cli/commands/init.ts +328 -0
  27. package/src/cli/commands/ls.ts +217 -0
  28. package/src/cli/commands/mcp/config.ts +300 -0
  29. package/src/cli/commands/mcp/index.ts +24 -0
  30. package/src/cli/commands/mcp/install.ts +203 -0
  31. package/src/cli/commands/mcp/paths.ts +470 -0
  32. package/src/cli/commands/mcp/status.ts +222 -0
  33. package/src/cli/commands/mcp/uninstall.ts +158 -0
  34. package/src/cli/commands/mcp.ts +20 -0
  35. package/src/cli/commands/models/clear.ts +103 -0
  36. package/src/cli/commands/models/index.ts +32 -0
  37. package/src/cli/commands/models/list.ts +214 -0
  38. package/src/cli/commands/models/path.ts +51 -0
  39. package/src/cli/commands/models/pull.ts +199 -0
  40. package/src/cli/commands/models/use.ts +85 -0
  41. package/src/cli/commands/multi-get.ts +400 -0
  42. package/src/cli/commands/query.ts +220 -0
  43. package/src/cli/commands/ref-parser.ts +108 -0
  44. package/src/cli/commands/reset.ts +191 -0
  45. package/src/cli/commands/search.ts +136 -0
  46. package/src/cli/commands/shared.ts +156 -0
  47. package/src/cli/commands/skill/index.ts +19 -0
  48. package/src/cli/commands/skill/install.ts +197 -0
  49. package/src/cli/commands/skill/paths-cmd.ts +81 -0
  50. package/src/cli/commands/skill/paths.ts +191 -0
  51. package/src/cli/commands/skill/show.ts +73 -0
  52. package/src/cli/commands/skill/uninstall.ts +141 -0
  53. package/src/cli/commands/status.ts +205 -0
  54. package/src/cli/commands/update.ts +68 -0
  55. package/src/cli/commands/vsearch.ts +188 -0
  56. package/src/cli/context.ts +64 -0
  57. package/src/cli/errors.ts +64 -0
  58. package/src/cli/format/search-results.ts +211 -0
  59. package/src/cli/options.ts +183 -0
  60. package/src/cli/program.ts +1330 -0
  61. package/src/cli/run.ts +213 -0
  62. package/src/cli/ui.ts +92 -0
  63. package/src/config/defaults.ts +20 -0
  64. package/src/config/index.ts +55 -0
  65. package/src/config/loader.ts +161 -0
  66. package/src/config/paths.ts +87 -0
  67. package/src/config/saver.ts +153 -0
  68. package/src/config/types.ts +280 -0
  69. package/src/converters/adapters/markitdownTs/adapter.ts +140 -0
  70. package/src/converters/adapters/officeparser/adapter.ts +126 -0
  71. package/src/converters/canonicalize.ts +89 -0
  72. package/src/converters/errors.ts +218 -0
  73. package/src/converters/index.ts +51 -0
  74. package/src/converters/mime.ts +163 -0
  75. package/src/converters/native/markdown.ts +115 -0
  76. package/src/converters/native/plaintext.ts +56 -0
  77. package/src/converters/path.ts +48 -0
  78. package/src/converters/pipeline.ts +159 -0
  79. package/src/converters/registry.ts +74 -0
  80. package/src/converters/types.ts +123 -0
  81. package/src/converters/versions.ts +24 -0
  82. package/src/index.ts +27 -0
  83. package/src/ingestion/chunker.ts +238 -0
  84. package/src/ingestion/index.ts +32 -0
  85. package/src/ingestion/language.ts +276 -0
  86. package/src/ingestion/sync.ts +671 -0
  87. package/src/ingestion/types.ts +219 -0
  88. package/src/ingestion/walker.ts +235 -0
  89. package/src/llm/cache.ts +467 -0
  90. package/src/llm/errors.ts +191 -0
  91. package/src/llm/index.ts +58 -0
  92. package/src/llm/nodeLlamaCpp/adapter.ts +133 -0
  93. package/src/llm/nodeLlamaCpp/embedding.ts +165 -0
  94. package/src/llm/nodeLlamaCpp/generation.ts +88 -0
  95. package/src/llm/nodeLlamaCpp/lifecycle.ts +317 -0
  96. package/src/llm/nodeLlamaCpp/rerank.ts +94 -0
  97. package/src/llm/registry.ts +86 -0
  98. package/src/llm/types.ts +129 -0
  99. package/src/mcp/resources/index.ts +151 -0
  100. package/src/mcp/server.ts +229 -0
  101. package/src/mcp/tools/get.ts +220 -0
  102. package/src/mcp/tools/index.ts +160 -0
  103. package/src/mcp/tools/multi-get.ts +263 -0
  104. package/src/mcp/tools/query.ts +226 -0
  105. package/src/mcp/tools/search.ts +119 -0
  106. package/src/mcp/tools/status.ts +81 -0
  107. package/src/mcp/tools/vsearch.ts +198 -0
  108. package/src/pipeline/chunk-lookup.ts +44 -0
  109. package/src/pipeline/expansion.ts +256 -0
  110. package/src/pipeline/explain.ts +115 -0
  111. package/src/pipeline/fusion.ts +185 -0
  112. package/src/pipeline/hybrid.ts +535 -0
  113. package/src/pipeline/index.ts +64 -0
  114. package/src/pipeline/query-language.ts +118 -0
  115. package/src/pipeline/rerank.ts +223 -0
  116. package/src/pipeline/search.ts +261 -0
  117. package/src/pipeline/types.ts +328 -0
  118. package/src/pipeline/vsearch.ts +348 -0
  119. package/src/store/index.ts +41 -0
  120. package/src/store/migrations/001-initial.ts +196 -0
  121. package/src/store/migrations/index.ts +20 -0
  122. package/src/store/migrations/runner.ts +187 -0
  123. package/src/store/sqlite/adapter.ts +1242 -0
  124. package/src/store/sqlite/index.ts +7 -0
  125. package/src/store/sqlite/setup.ts +129 -0
  126. package/src/store/sqlite/types.ts +28 -0
  127. package/src/store/types.ts +506 -0
  128. package/src/store/vector/index.ts +13 -0
  129. package/src/store/vector/sqlite-vec.ts +373 -0
  130. package/src/store/vector/stats.ts +152 -0
  131. package/src/store/vector/types.ts +115 -0
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Ingestion subsystem - public exports.
3
+ *
4
+ * @module src/ingestion
5
+ */
6
+
7
+ // Chunker
8
+ export { defaultChunker, MarkdownChunker } from './chunker';
9
+ // Language detection
10
+ export { defaultLanguageDetector, SimpleLanguageDetector } from './language';
11
+ // Sync service
12
+ export { defaultSyncService, SyncService } from './sync';
13
+ // Types
14
+ export type {
15
+ ChunkerPort,
16
+ ChunkOutput,
17
+ ChunkParams,
18
+ CollectionSyncResult,
19
+ FileSyncResult,
20
+ FileSyncStatus,
21
+ LanguageDetectorPort,
22
+ ProcessDecision,
23
+ SkippedEntry,
24
+ SyncOptions,
25
+ SyncResult,
26
+ WalkConfig,
27
+ WalkEntry,
28
+ WalkerPort,
29
+ } from './types';
30
+ export { collectionToWalkConfig, DEFAULT_CHUNK_PARAMS } from './types';
31
+ // Walker
32
+ export { defaultWalker, FileWalker } from './walker';
@@ -0,0 +1,276 @@
1
+ /**
2
+ * Deterministic language detection for chunks.
3
+ * Uses simple heuristics - no external dependencies.
4
+ *
5
+ * @module src/ingestion/language
6
+ */
7
+
8
+ import type { LanguageDetectorPort } from './types';
9
+
10
+ /** Regex to split on whitespace and punctuation */
11
+ const WORD_SPLIT_REGEX = /[\s\p{P}]+/u;
12
+
13
+ /** Hiragana range */
14
+ const HIRAGANA_MIN = 0x30_40;
15
+ const HIRAGANA_MAX = 0x30_9f;
16
+
17
+ /** Katakana range */
18
+ const KATAKANA_MIN = 0x30_a0;
19
+ const KATAKANA_MAX = 0x30_ff;
20
+
21
+ /** Hangul range */
22
+ const HANGUL_MIN = 0xac_00;
23
+ const HANGUL_MAX = 0xd7_af;
24
+
25
+ /** CJK ranges for quick codepoint checking */
26
+ const CJK_RANGES = [
27
+ [0x4e_00, 0x9f_ff], // CJK Unified Ideographs
28
+ [0x34_00, 0x4d_bf], // CJK Unified Ideographs Extension A
29
+ [0x30_40, 0x30_9f], // Hiragana
30
+ [0x30_a0, 0x30_ff], // Katakana
31
+ [0xac_00, 0xd7_af], // Hangul
32
+ ] as const;
33
+
34
+ /**
35
+ * Character frequency thresholds for CJK detection.
36
+ */
37
+ const CJK_THRESHOLD = 0.1; // 10% CJK chars triggers detection
38
+
39
+ /**
40
+ * Common words for European language detection.
41
+ * These are stop words that appear frequently.
42
+ * Pre-built as Sets for O(1) lookup.
43
+ */
44
+ const LANGUAGE_MARKER_SETS: Record<string, Set<string>> = {
45
+ en: new Set([
46
+ 'the',
47
+ 'and',
48
+ 'is',
49
+ 'are',
50
+ 'was',
51
+ 'were',
52
+ 'be',
53
+ 'have',
54
+ 'has',
55
+ 'this',
56
+ 'that',
57
+ 'with',
58
+ 'for',
59
+ 'not',
60
+ ]),
61
+ de: new Set([
62
+ 'der',
63
+ 'die',
64
+ 'das',
65
+ 'und',
66
+ 'ist',
67
+ 'sind',
68
+ 'ein',
69
+ 'eine',
70
+ 'für',
71
+ 'mit',
72
+ 'auf',
73
+ 'den',
74
+ 'dem',
75
+ 'nicht',
76
+ ]),
77
+ fr: new Set([
78
+ 'le',
79
+ 'la',
80
+ 'les',
81
+ 'et',
82
+ 'est',
83
+ 'sont',
84
+ 'un',
85
+ 'une',
86
+ 'pour',
87
+ 'avec',
88
+ 'sur',
89
+ 'des',
90
+ 'dans',
91
+ 'pas',
92
+ ]),
93
+ it: new Set([
94
+ 'il',
95
+ 'la',
96
+ 'le',
97
+ 'e',
98
+ 'è',
99
+ 'sono',
100
+ 'un',
101
+ 'una',
102
+ 'per',
103
+ 'con',
104
+ 'su',
105
+ 'dei',
106
+ 'nel',
107
+ 'non',
108
+ ]),
109
+ };
110
+
111
+ /**
112
+ * Check if a codepoint is CJK.
113
+ */
114
+ function isCjkCodepoint(cp: number): boolean {
115
+ for (const [min, max] of CJK_RANGES) {
116
+ if (cp >= min && cp <= max) {
117
+ return true;
118
+ }
119
+ }
120
+ return false;
121
+ }
122
+
123
+ /**
124
+ * Check if a codepoint is whitespace.
125
+ */
126
+ function isWhitespace(cp: number): boolean {
127
+ // Common whitespace codepoints
128
+ return (
129
+ cp === 0x20 || // space
130
+ cp === 0x09 || // tab
131
+ cp === 0x0a || // newline
132
+ cp === 0x0d || // carriage return
133
+ cp === 0x0c || // form feed
134
+ cp === 0xa0 || // non-breaking space
135
+ (cp >= 0x20_00 && cp <= 0x20_0a) // various spaces
136
+ );
137
+ }
138
+
139
+ /**
140
+ * Extract words from text for language analysis.
141
+ */
142
+ function extractWords(text: string): string[] {
143
+ return text
144
+ .toLowerCase()
145
+ .split(WORD_SPLIT_REGEX)
146
+ .filter((w) => w.length >= 2 && w.length <= 15);
147
+ }
148
+
149
+ /**
150
+ * Detect if text is primarily CJK (Chinese, Japanese, Korean).
151
+ * Single-pass counting for efficiency.
152
+ */
153
+ // biome-ignore lint/complexity/noExcessiveCognitiveComplexity: Unicode range detection with multiple language heuristics
154
+ function detectCjk(text: string): 'zh' | 'ja' | 'ko' | null {
155
+ let totalChars = 0;
156
+ let cjkCount = 0;
157
+ let hasHiragana = false;
158
+ let hasKatakana = false;
159
+ let hasHangul = false;
160
+
161
+ // Single pass through the string
162
+ for (const char of text) {
163
+ const cp = char.codePointAt(0);
164
+ if (cp === undefined) {
165
+ continue;
166
+ }
167
+
168
+ // Skip whitespace for total count
169
+ if (!isWhitespace(cp)) {
170
+ totalChars += 1;
171
+
172
+ // Check CJK ranges
173
+ if (isCjkCodepoint(cp)) {
174
+ cjkCount += 1;
175
+
176
+ // Also check for script-specific markers
177
+ if (cp >= HIRAGANA_MIN && cp <= HIRAGANA_MAX) {
178
+ hasHiragana = true;
179
+ } else if (cp >= KATAKANA_MIN && cp <= KATAKANA_MAX) {
180
+ hasKatakana = true;
181
+ } else if (cp >= HANGUL_MIN && cp <= HANGUL_MAX) {
182
+ hasHangul = true;
183
+ }
184
+ }
185
+ }
186
+ }
187
+
188
+ if (totalChars === 0) {
189
+ return null;
190
+ }
191
+
192
+ const cjkRatio = cjkCount / totalChars;
193
+
194
+ if (cjkRatio < CJK_THRESHOLD) {
195
+ return null;
196
+ }
197
+
198
+ // Distinguish between CJK languages by script-specific characters
199
+ if (hasHiragana || hasKatakana) {
200
+ return 'ja';
201
+ }
202
+ if (hasHangul) {
203
+ return 'ko';
204
+ }
205
+
206
+ // Default to Chinese for pure Han characters
207
+ return 'zh';
208
+ }
209
+
210
+ /**
211
+ * Detect European language by word frequency.
212
+ * Uses pre-built Sets for O(1) marker lookup.
213
+ */
214
+ function detectEuropean(words: string[]): string | null {
215
+ if (words.length < 10) {
216
+ return null;
217
+ }
218
+
219
+ const scores: Record<string, number> = {};
220
+
221
+ for (const [lang, markerSet] of Object.entries(LANGUAGE_MARKER_SETS)) {
222
+ let matches = 0;
223
+
224
+ for (const word of words) {
225
+ if (markerSet.has(word)) {
226
+ matches += 1;
227
+ }
228
+ }
229
+
230
+ scores[lang] = matches / words.length;
231
+ }
232
+
233
+ // Find language with highest score (must exceed threshold)
234
+ const threshold = 0.02; // 2% of words must be markers
235
+ let bestLang: string | null = null;
236
+ let bestScore = threshold;
237
+
238
+ for (const [lang, score] of Object.entries(scores)) {
239
+ if (score > bestScore) {
240
+ bestScore = score;
241
+ bestLang = lang;
242
+ }
243
+ }
244
+
245
+ return bestLang;
246
+ }
247
+
248
+ /**
249
+ * Simple deterministic language detector.
250
+ * Priority:
251
+ * 1. CJK detection (script-based)
252
+ * 2. European language detection (word frequency)
253
+ * 3. null (undetermined)
254
+ */
255
+ export class SimpleLanguageDetector implements LanguageDetectorPort {
256
+ detect(text: string): string | null {
257
+ if (!text || text.length < 50) {
258
+ return null;
259
+ }
260
+
261
+ // Try CJK first (script-based, more reliable)
262
+ const cjk = detectCjk(text);
263
+ if (cjk) {
264
+ return cjk;
265
+ }
266
+
267
+ // Try European languages
268
+ const words = extractWords(text);
269
+ return detectEuropean(words);
270
+ }
271
+ }
272
+
273
+ /**
274
+ * Default language detector instance.
275
+ */
276
+ export const defaultLanguageDetector = new SimpleLanguageDetector();