@weave_protocol/domere 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/PLANNING.md +231 -0
  2. package/README.md +50 -0
  3. package/dist/anchoring/ethereum.d.ts +135 -0
  4. package/dist/anchoring/ethereum.d.ts.map +1 -0
  5. package/dist/anchoring/ethereum.js +474 -0
  6. package/dist/anchoring/ethereum.js.map +1 -0
  7. package/dist/anchoring/index.d.ts +93 -0
  8. package/dist/anchoring/index.d.ts.map +1 -0
  9. package/dist/anchoring/index.js +184 -0
  10. package/dist/anchoring/index.js.map +1 -0
  11. package/dist/anchoring/merkle.d.ts +91 -0
  12. package/dist/anchoring/merkle.d.ts.map +1 -0
  13. package/dist/anchoring/merkle.js +203 -0
  14. package/dist/anchoring/merkle.js.map +1 -0
  15. package/dist/anchoring/solana.d.ts +85 -0
  16. package/dist/anchoring/solana.d.ts.map +1 -0
  17. package/dist/anchoring/solana.js +301 -0
  18. package/dist/anchoring/solana.js.map +1 -0
  19. package/dist/constants.d.ts +130 -0
  20. package/dist/constants.d.ts.map +1 -0
  21. package/dist/constants.js +536 -0
  22. package/dist/constants.js.map +1 -0
  23. package/dist/index.d.ts +13 -0
  24. package/dist/index.d.ts.map +1 -0
  25. package/dist/index.js +37 -0
  26. package/dist/index.js.map +1 -0
  27. package/dist/language/code-analyzer.d.ts +80 -0
  28. package/dist/language/code-analyzer.d.ts.map +1 -0
  29. package/dist/language/code-analyzer.js +489 -0
  30. package/dist/language/code-analyzer.js.map +1 -0
  31. package/dist/language/detector.d.ts +53 -0
  32. package/dist/language/detector.d.ts.map +1 -0
  33. package/dist/language/detector.js +248 -0
  34. package/dist/language/detector.js.map +1 -0
  35. package/dist/language/index.d.ts +61 -0
  36. package/dist/language/index.d.ts.map +1 -0
  37. package/dist/language/index.js +109 -0
  38. package/dist/language/index.js.map +1 -0
  39. package/dist/language/nl-analyzer.d.ts +59 -0
  40. package/dist/language/nl-analyzer.d.ts.map +1 -0
  41. package/dist/language/nl-analyzer.js +350 -0
  42. package/dist/language/nl-analyzer.js.map +1 -0
  43. package/dist/language/semantic.d.ts +48 -0
  44. package/dist/language/semantic.d.ts.map +1 -0
  45. package/dist/language/semantic.js +329 -0
  46. package/dist/language/semantic.js.map +1 -0
  47. package/dist/storage/index.d.ts +6 -0
  48. package/dist/storage/index.d.ts.map +1 -0
  49. package/dist/storage/index.js +6 -0
  50. package/dist/storage/index.js.map +1 -0
  51. package/dist/storage/memory.d.ts +48 -0
  52. package/dist/storage/memory.d.ts.map +1 -0
  53. package/dist/storage/memory.js +211 -0
  54. package/dist/storage/memory.js.map +1 -0
  55. package/dist/thread/drift.d.ts +43 -0
  56. package/dist/thread/drift.d.ts.map +1 -0
  57. package/dist/thread/drift.js +248 -0
  58. package/dist/thread/drift.js.map +1 -0
  59. package/dist/thread/index.d.ts +9 -0
  60. package/dist/thread/index.d.ts.map +1 -0
  61. package/dist/thread/index.js +9 -0
  62. package/dist/thread/index.js.map +1 -0
  63. package/dist/thread/intent.d.ts +68 -0
  64. package/dist/thread/intent.d.ts.map +1 -0
  65. package/dist/thread/intent.js +333 -0
  66. package/dist/thread/intent.js.map +1 -0
  67. package/dist/thread/manager.d.ts +85 -0
  68. package/dist/thread/manager.d.ts.map +1 -0
  69. package/dist/thread/manager.js +305 -0
  70. package/dist/thread/manager.js.map +1 -0
  71. package/dist/thread/weave.d.ts +61 -0
  72. package/dist/thread/weave.d.ts.map +1 -0
  73. package/dist/thread/weave.js +158 -0
  74. package/dist/thread/weave.js.map +1 -0
  75. package/dist/tools/index.d.ts +18 -0
  76. package/dist/tools/index.d.ts.map +1 -0
  77. package/dist/tools/index.js +102 -0
  78. package/dist/tools/index.js.map +1 -0
  79. package/dist/types.d.ts +466 -0
  80. package/dist/types.d.ts.map +1 -0
  81. package/dist/types.js +48 -0
  82. package/dist/types.js.map +1 -0
  83. package/package.json +24 -0
  84. package/src/anchoring/ethereum.ts +568 -0
  85. package/src/anchoring/index.ts +236 -0
  86. package/src/anchoring/merkle.ts +256 -0
  87. package/src/anchoring/solana.ts +370 -0
  88. package/src/constants.ts +566 -0
  89. package/src/index.ts +43 -0
  90. package/src/language/code-analyzer.ts +564 -0
  91. package/src/language/detector.ts +297 -0
  92. package/src/language/index.ts +129 -0
  93. package/src/language/nl-analyzer.ts +411 -0
  94. package/src/language/semantic.ts +385 -0
  95. package/src/storage/index.ts +6 -0
  96. package/src/storage/memory.ts +271 -0
  97. package/src/thread/drift.ts +319 -0
  98. package/src/thread/index.ts +9 -0
  99. package/src/thread/intent.ts +409 -0
  100. package/src/thread/manager.ts +414 -0
  101. package/src/thread/weave.ts +205 -0
  102. package/src/tools/index.ts +107 -0
  103. package/src/types.ts +736 -0
  104. package/tsconfig.json +19 -0
@@ -0,0 +1,297 @@
1
+ /**
2
+ * Dōmere - The Judge Protocol
3
+ * Language Detection
4
+ */
5
+
6
+ import type {
7
+ LanguageAnalysis,
8
+ DetectedLanguage,
9
+ LanguageSegment,
10
+ LanguageType,
11
+ } from '../types.js';
12
+ import { LANGUAGE_PATTERNS } from '../constants.js';
13
+
14
+ // ============================================================================
15
+ // Language Detector
16
+ // ============================================================================
17
+
18
+ export class LanguageDetector {
19
+ /**
20
+ * Detect the primary language(s) in content
21
+ */
22
+ detect(content: string): LanguageAnalysis {
23
+ const detectedLanguages = this.detectLanguages(content);
24
+ const primary = this.determinePrimaryLanguage(detectedLanguages);
25
+
26
+ return {
27
+ detected_languages: detectedLanguages,
28
+ primary_language: primary.language,
29
+ confidence: primary.confidence,
30
+ };
31
+ }
32
+
33
+ /**
34
+ * Detect all languages present in content
35
+ */
36
+ detectLanguages(content: string): DetectedLanguage[] {
37
+ const results: Map<LanguageType, { score: number; segments: LanguageSegment[] }> = new Map();
38
+
39
+ // First pass: detect code languages (more specific patterns)
40
+ const codeLanguages: LanguageType[] = [
41
+ 'typescript', 'javascript', 'python', 'sql', 'java', 'csharp', 'go', 'rust',
42
+ 'ruby', 'php', 'swift', 'kotlin', 'scala', 'bash', 'powershell',
43
+ 'json', 'yaml', 'xml', 'html', 'css', 'markdown',
44
+ 'graphql', 'protobuf', 'regex',
45
+ ];
46
+
47
+ for (const lang of codeLanguages) {
48
+ const detection = this.detectLanguage(content, lang);
49
+ if (detection.score > 0.1) {
50
+ results.set(lang, detection);
51
+ }
52
+ }
53
+
54
+ // If no code detected, check for natural language
55
+ if (results.size === 0 || this.isLikelyNaturalLanguage(content)) {
56
+ const nlLanguages: LanguageType[] = ['english', 'spanish', 'french', 'german', 'chinese', 'japanese'];
57
+
58
+ for (const lang of nlLanguages) {
59
+ const detection = this.detectLanguage(content, lang);
60
+ if (detection.score > 0.1) {
61
+ // Check if we already have code - if so, this might be mixed
62
+ const existingTotal = Array.from(results.values()).reduce((sum, d) => sum + d.score, 0);
63
+ if (existingTotal < 0.5 || detection.score > 0.3) {
64
+ results.set(lang, detection);
65
+ }
66
+ }
67
+ }
68
+ }
69
+
70
+ // Convert to array and normalize
71
+ const detected: DetectedLanguage[] = [];
72
+ const totalScore = Array.from(results.values()).reduce((sum, d) => sum + d.score, 0);
73
+
74
+ for (const [language, data] of results) {
75
+ const confidence = totalScore > 0 ? data.score / totalScore : 0;
76
+ if (confidence > 0.05) { // Only include if > 5% confidence
77
+ detected.push({
78
+ language,
79
+ confidence,
80
+ segments: data.segments,
81
+ });
82
+ }
83
+ }
84
+
85
+ // Sort by confidence
86
+ detected.sort((a, b) => b.confidence - a.confidence);
87
+
88
+ // If nothing detected, return unknown
89
+ if (detected.length === 0) {
90
+ return [{
91
+ language: 'unknown',
92
+ confidence: 1,
93
+ segments: [{ start: 0, end: content.length, language: 'unknown', content, confidence: 1 }],
94
+ }];
95
+ }
96
+
97
+ return detected;
98
+ }
99
+
100
+ /**
101
+ * Detect a specific language in content
102
+ */
103
+ private detectLanguage(content: string, language: LanguageType): { score: number; segments: LanguageSegment[] } {
104
+ const config = LANGUAGE_PATTERNS[language];
105
+ if (!config) {
106
+ return { score: 0, segments: [] };
107
+ }
108
+
109
+ let score = 0;
110
+ const segments: LanguageSegment[] = [];
111
+ const contentLower = content.toLowerCase();
112
+
113
+ // Check patterns
114
+ for (const pattern of config.patterns) {
115
+ const matches = content.match(pattern);
116
+ if (matches) {
117
+ score += matches.length * 0.15;
118
+
119
+ // Find positions of matches
120
+ let lastIndex = 0;
121
+ for (const match of matches) {
122
+ const index = content.indexOf(match, lastIndex);
123
+ if (index !== -1) {
124
+ segments.push({
125
+ start: index,
126
+ end: index + match.length,
127
+ language,
128
+ content: match,
129
+ confidence: 0.8,
130
+ });
131
+ lastIndex = index + match.length;
132
+ }
133
+ }
134
+ }
135
+ }
136
+
137
+ // Check keywords
138
+ for (const keyword of config.keywords) {
139
+ const keywordLower = keyword.toLowerCase();
140
+ // Match whole words only
141
+ const regex = new RegExp(`\\b${this.escapeRegex(keywordLower)}\\b`, 'gi');
142
+ const matches = contentLower.match(regex);
143
+ if (matches) {
144
+ score += matches.length * 0.05;
145
+ }
146
+ }
147
+
148
+ // Normalize score (cap at 1)
149
+ score = Math.min(1, score);
150
+
151
+ // Merge overlapping segments
152
+ const mergedSegments = this.mergeSegments(segments);
153
+
154
+ return { score, segments: mergedSegments };
155
+ }
156
+
157
+ /**
158
+ * Check if content is likely natural language (not code)
159
+ */
160
+ private isLikelyNaturalLanguage(content: string): boolean {
161
+ // Check for common indicators of natural language
162
+ const sentencePattern = /[.!?]\s+[A-Z]/g;
163
+ const sentences = content.match(sentencePattern)?.length || 0;
164
+
165
+ // Check for lack of code indicators
166
+ const codeIndicators = /[{}();=<>]|\bfunction\b|\bclass\b|\bdef\b|\bimport\b|\bexport\b/g;
167
+ const codeMatches = content.match(codeIndicators)?.length || 0;
168
+
169
+ // Natural language has more sentences than code indicators
170
+ return sentences > codeMatches || (sentences > 2 && codeMatches < 5);
171
+ }
172
+
173
+ /**
174
+ * Determine the primary language
175
+ */
176
+ private determinePrimaryLanguage(detected: DetectedLanguage[]): { language: LanguageType; confidence: number } {
177
+ if (detected.length === 0) {
178
+ return { language: 'unknown', confidence: 0 };
179
+ }
180
+
181
+ // If multiple languages with similar confidence, it's mixed
182
+ if (detected.length > 1 && detected[0].confidence < 0.6 &&
183
+ detected[1].confidence > detected[0].confidence * 0.5) {
184
+ return { language: 'mixed', confidence: detected[0].confidence };
185
+ }
186
+
187
+ return { language: detected[0].language, confidence: detected[0].confidence };
188
+ }
189
+
190
+ /**
191
+ * Merge overlapping segments
192
+ */
193
+ private mergeSegments(segments: LanguageSegment[]): LanguageSegment[] {
194
+ if (segments.length <= 1) return segments;
195
+
196
+ // Sort by start position
197
+ segments.sort((a, b) => a.start - b.start);
198
+
199
+ const merged: LanguageSegment[] = [];
200
+ let current = segments[0];
201
+
202
+ for (let i = 1; i < segments.length; i++) {
203
+ const next = segments[i];
204
+
205
+ if (next.start <= current.end) {
206
+ // Overlapping - merge
207
+ current = {
208
+ start: current.start,
209
+ end: Math.max(current.end, next.end),
210
+ language: current.language,
211
+ content: current.content + next.content.slice(Math.max(0, current.end - next.start)),
212
+ confidence: Math.max(current.confidence, next.confidence),
213
+ };
214
+ } else {
215
+ merged.push(current);
216
+ current = next;
217
+ }
218
+ }
219
+ merged.push(current);
220
+
221
+ return merged;
222
+ }
223
+
224
+ /**
225
+ * Escape regex special characters
226
+ */
227
+ private escapeRegex(str: string): string {
228
+ return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
229
+ }
230
+
231
+ /**
232
+ * Check if content is a specific language type
233
+ */
234
+ isLanguage(content: string, language: LanguageType): boolean {
235
+ const detection = this.detectLanguage(content, language);
236
+ return detection.score > 0.3;
237
+ }
238
+
239
+ /**
240
+ * Check if content contains code
241
+ */
242
+ containsCode(content: string): boolean {
243
+ const codeLanguages: LanguageType[] = [
244
+ 'javascript', 'typescript', 'python', 'sql', 'java', 'csharp', 'go', 'rust',
245
+ 'ruby', 'php', 'swift', 'kotlin', 'bash', 'powershell',
246
+ ];
247
+
248
+ for (const lang of codeLanguages) {
249
+ if (this.isLanguage(content, lang)) {
250
+ return true;
251
+ }
252
+ }
253
+
254
+ return false;
255
+ }
256
+
257
+ /**
258
+ * Extract code blocks from content
259
+ */
260
+ extractCodeBlocks(content: string): { language: string; code: string; start: number; end: number }[] {
261
+ const blocks: { language: string; code: string; start: number; end: number }[] = [];
262
+
263
+ // Match fenced code blocks (```language ... ```)
264
+ const fencedRegex = /```(\w+)?\s*\n([\s\S]*?)```/g;
265
+ let match;
266
+
267
+ while ((match = fencedRegex.exec(content)) !== null) {
268
+ const language = match[1] || 'unknown';
269
+ const code = match[2];
270
+ blocks.push({
271
+ language,
272
+ code,
273
+ start: match.index,
274
+ end: match.index + match[0].length,
275
+ });
276
+ }
277
+
278
+ // Match indented code blocks (4 spaces or tab)
279
+ const indentedRegex = /(?:^|\n)((?:(?: |\t).+\n?)+)/g;
280
+
281
+ while ((match = indentedRegex.exec(content)) !== null) {
282
+ const code = match[1].replace(/^( |\t)/gm, '');
283
+ // Detect language of this block
284
+ const detection = this.detectLanguages(code);
285
+ const language = detection[0]?.language || 'unknown';
286
+
287
+ blocks.push({
288
+ language,
289
+ code,
290
+ start: match.index,
291
+ end: match.index + match[0].length,
292
+ });
293
+ }
294
+
295
+ return blocks;
296
+ }
297
+ }
@@ -0,0 +1,129 @@
1
+ /**
2
+ * Dōmere - The Judge Protocol
3
+ * Language Module
4
+ */
5
+
6
+ export { LanguageDetector } from './detector.js';
7
+ export { SemanticAnalyzer } from './semantic.js';
8
+ export { CodeAnalyzer } from './code-analyzer.js';
9
+ export { NLAnalyzer } from './nl-analyzer.js';
10
+
11
+ import type { LanguageAnalysis, LanguageType } from '../types.js';
12
+ import { LanguageDetector } from './detector.js';
13
+ import { SemanticAnalyzer } from './semantic.js';
14
+ import { CodeAnalyzer } from './code-analyzer.js';
15
+ import { NLAnalyzer } from './nl-analyzer.js';
16
+
17
+ // ============================================================================
18
+ // Unified Language Analyzer
19
+ // ============================================================================
20
+
21
+ export class LanguageAnalyzerService {
22
+ private detector: LanguageDetector;
23
+ private semantic: SemanticAnalyzer;
24
+ private code: CodeAnalyzer;
25
+ private nl: NLAnalyzer;
26
+
27
+ constructor() {
28
+ this.detector = new LanguageDetector();
29
+ this.semantic = new SemanticAnalyzer();
30
+ this.code = new CodeAnalyzer();
31
+ this.nl = new NLAnalyzer();
32
+ }
33
+
34
+ /**
35
+ * Perform complete language analysis
36
+ */
37
+ analyze(content: string): LanguageAnalysis {
38
+ // First detect languages
39
+ const detection = this.detector.detect(content);
40
+
41
+ // Build full analysis
42
+ const analysis: LanguageAnalysis = {
43
+ ...detection,
44
+ };
45
+
46
+ // Add semantic analysis
47
+ analysis.semantic = this.semantic.analyze(content);
48
+
49
+ // Add code analysis if code detected
50
+ const codeLanguages: LanguageType[] = [
51
+ 'javascript', 'typescript', 'python', 'sql', 'java', 'csharp', 'go', 'rust',
52
+ 'ruby', 'php', 'swift', 'kotlin', 'bash', 'powershell',
53
+ ];
54
+
55
+ if (codeLanguages.includes(detection.primary_language as LanguageType)) {
56
+ analysis.code_analysis = this.code.analyze(content, detection.primary_language as LanguageType);
57
+ }
58
+
59
+ // Add NL analysis for natural language or mixed content
60
+ const nlLanguages: LanguageType[] = ['english', 'spanish', 'french', 'german', 'chinese', 'japanese', 'mixed', 'unknown'];
61
+ if (nlLanguages.includes(detection.primary_language as LanguageType) ||
62
+ detection.detected_languages.some(d => nlLanguages.includes(d.language))) {
63
+ analysis.nl_analysis = this.nl.analyze(content);
64
+ }
65
+
66
+ return analysis;
67
+ }
68
+
69
+ /**
70
+ * Quick language detection
71
+ */
72
+ detectLanguage(content: string): { language: LanguageType; confidence: number } {
73
+ const detection = this.detector.detect(content);
74
+ return {
75
+ language: detection.primary_language as LanguageType,
76
+ confidence: detection.confidence,
77
+ };
78
+ }
79
+
80
+ /**
81
+ * Check if content contains code
82
+ */
83
+ containsCode(content: string): boolean {
84
+ return this.detector.containsCode(content);
85
+ }
86
+
87
+ /**
88
+ * Analyze code specifically
89
+ */
90
+ analyzeCode(code: string, language?: LanguageType) {
91
+ const lang = language || this.detectLanguage(code).language;
92
+ return this.code.analyze(code, lang);
93
+ }
94
+
95
+ /**
96
+ * Check for injection attempts
97
+ */
98
+ checkInjection(content: string) {
99
+ return this.nl.analyze(content);
100
+ }
101
+
102
+ /**
103
+ * Get injection risk score
104
+ */
105
+ getInjectionRisk(content: string): number {
106
+ return this.nl.getInjectionRiskScore(content);
107
+ }
108
+
109
+ /**
110
+ * Extract entities
111
+ */
112
+ extractEntities(content: string) {
113
+ return this.semantic.extractEntities(content);
114
+ }
115
+
116
+ /**
117
+ * Classify intent
118
+ */
119
+ classifyIntent(content: string) {
120
+ return this.semantic.classifyIntent(content);
121
+ }
122
+
123
+ /**
124
+ * Extract code blocks from mixed content
125
+ */
126
+ extractCodeBlocks(content: string) {
127
+ return this.detector.extractCodeBlocks(content);
128
+ }
129
+ }