aggroot 1.4.9 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,148 @@
1
+ // Test just the tokenizer - inline the key parts
2
+ function makeLexer(src: string) {
3
+ return { src, len: src.length, i: 0 }
4
+ }
5
+
6
+ function peek(L: any, off = 0): string {
7
+ return L.i + off < L.len ? L.src[L.i + off] : ''
8
+ }
9
+
10
+ function advance(L: any): void {
11
+ L.i++
12
+ }
13
+
14
+ function skipBlanks(L: any): void {
15
+ while (L.i < L.len) {
16
+ const c = L.src[L.i]
17
+ if (c === ' ' || c === '\t' || c === '\r') {
18
+ advance(L)
19
+ } else if (c === '\\' && (L.src[L.i + 1] === '\n' || (L.src[L.i + 1] === '\r' && L.src[L.i + 2] === '\n'))) {
20
+ advance(L); advance(L)
21
+ if (L.src[L.i - 1] === '\r') advance(L)
22
+ } else {
23
+ break
24
+ }
25
+ }
26
+ }
27
+
28
+ function isWordChar(c: string): boolean {
29
+ return (
30
+ (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
31
+ (c >= '0' && c <= '9') || c === '_' || c === '/' || c === '.' ||
32
+ c === '-' || c === '+' || c === ':' || c === '@' || c === '%' ||
33
+ c === ',' || c === '~' || c === '^' || c === '?' || c === '*' ||
34
+ c === '!' || c === '=' || c === '[' || c === ']'
35
+ )
36
+ }
37
+
38
+ function isWordStart(c: string): boolean {
39
+ return isWordChar(c) || c === '\\'
40
+ }
41
+
42
+ function nextToken(L: any, ctx: string = 'arg'): any {
43
+ skipBlanks(L)
44
+ const start = L.i
45
+ if (L.i >= L.len) return { type: 'EOF', value: '', start, end: start }
46
+
47
+ const c = L.src[L.i]
48
+ const c1 = peek(L, 1)
49
+ const c2 = peek(L, 2)
50
+
51
+ if (c === '\n') { advance(L); return { type: 'NEWLINE', value: '\n', start, end: L.i } }
52
+ if (c === '#') {
53
+ const si = L.i
54
+ while (L.i < L.len && L.src[L.i] !== '\n') advance(L)
55
+ return { type: 'COMMENT', value: L.src.slice(si, L.i), start, end: L.i }
56
+ }
57
+
58
+ // Multi-char operators (longest match first)
59
+ if (c === '&' && c1 === '&') { advance(L); advance(L); return { type: 'OP', value: '&&', start, end: L.i } }
60
+ if (c === '|' && c1 === '|') { advance(L); advance(L); return { type: 'OP', value: '||', start, end: L.i } }
61
+ if (c === '|' && c1 === '&') { advance(L); advance(L); return { type: 'OP', value: '|&', start, end: L.i } }
62
+ if (c === '>' && c1 === '>') { advance(L); advance(L); return { type: 'OP', value: '>>', start, end: L.i } }
63
+ if (c === '>' && c1 === '&') { advance(L); advance(L); return { type: 'OP', value: '>&', start, end: L.i } }
64
+ if (c === '>' && c1 === '|') { advance(L); advance(L); return { type: 'OP', value: '>|', start, end: L.i } }
65
+ if (c === '&' && c1 === '>' && c2 === '>') { advance(L); advance(L); advance(L); return { type: 'OP', value: '&>>', start, end: L.i } }
66
+ if (c === '&' && c1 === '>') { advance(L); advance(L); return { type: 'OP', value: '&>', start, end: L.i } }
67
+ if (c === '<' && c1 === '<' && c2 === '<') { advance(L); advance(L); advance(L); return { type: 'OP', value: '<<<', start, end: L.i } }
68
+ if (c === '<' && c1 === '<' && c2 === '-') { advance(L); advance(L); advance(L); return { type: 'OP', value: '<<-', start, end: L.i } }
69
+ if (c === '<' && c1 === '<') { advance(L); advance(L); return { type: 'OP', value: '<<', start, end: L.i } }
70
+ if (c === '<' && c1 === '&') { advance(L); advance(L); return { type: 'OP', value: '<&', start, end: L.i } }
71
+ if (c === '<' && c1 === '(') { advance(L); advance(L); return { type: 'LT_PAREN', value: '<(', start, end: L.i } }
72
+ if (c === '>' && c1 === '(') { advance(L); advance(L); return { type: 'GT_PAREN', value: '>(', start, end: L.i } }
73
+ if (c === '(' && c1 === '(') { advance(L); advance(L); return { type: 'OP', value: '((', start, end: L.i } }
74
+ if (c === ')' && c1 === ')') { advance(L); advance(L); return { type: 'OP', value: '))', start, end: L.i } }
75
+
76
+ if (c === '|' || c === '&' || c === ';' || c === '>' || c === '<') {
77
+ advance(L); return { type: 'OP', value: c, start, end: L.i }
78
+ }
79
+ if (c === '(' || c === ')') {
80
+ advance(L); return { type: 'OP', value: c, start, end: L.i }
81
+ }
82
+
83
+ // In cmd position, [ [[ { start test/group
84
+ if (ctx === 'cmd') {
85
+ if (c === '[' && c1 === '[') { advance(L); advance(L); return { type: 'OP', value: '[[', start, end: L.i } }
86
+ if (c === '[') { advance(L); return { type: 'OP', value: '[', start, end: L.i } }
87
+ if (c === '{' && (c1 === ' ' || c1 === '\t' || c1 === '\n')) { advance(L); return { type: 'OP', value: '{', start, end: L.i } }
88
+ if (c === '}') { advance(L); return { type: 'OP', value: '}', start, end: L.i } }
89
+ if (c === '!' && (c1 === ' ' || c1 === '\t')) { advance(L); return { type: 'OP', value: '!', start, end: L.i } }
90
+ }
91
+
92
+ // Quotes
93
+ if (c === '"') { advance(L); return { type: 'DQUOTE', value: '"', start, end: L.i } }
94
+ if (c === "'") {
95
+ const si = L.i; advance(L)
96
+ while (L.i < L.len && L.src[L.i] !== "'") advance(L)
97
+ if (L.i < L.len) advance(L)
98
+ return { type: 'SQUOTE', value: L.src.slice(si, L.i), start, end: L.i }
99
+ }
100
+ if (c === '$') {
101
+ if (c1 === '(' && c2 === '(') { advance(L); advance(L); advance(L); return { type: 'DOLLAR_DPAREN', value: '$((', start, end: L.i } }
102
+ if (c1 === '(') { advance(L); advance(L); return { type: 'DOLLAR_PAREN', value: '$(', start, end: L.i } }
103
+ if (c1 === '{') { advance(L); advance(L); return { type: 'DOLLAR_BRACE', value: '${', start, end: L.i } }
104
+ advance(L); return { type: 'DOLLAR', value: '$', start, end: L.i }
105
+ }
106
+ if (c === '`') {
107
+ const si = L.i; advance(L)
108
+ while (L.i < L.len && L.src[L.i] !== '`') advance(L)
109
+ if (L.i < L.len) advance(L)
110
+ return { type: 'BACKTICK', value: L.src.slice(si, L.i), start, end: L.i }
111
+ }
112
+
113
+ // Word
114
+ if (isWordStart(c)) {
115
+ const si = L.i
116
+ while (L.i < L.len && isWordChar(L.src[L.i]!)) advance(L)
117
+ return { type: 'WORD', value: L.src.slice(si, L.i), start, end: L.i }
118
+ }
119
+
120
+ // Fallback: single char as word
121
+ advance(L)
122
+ return { type: 'WORD', value: c, start, end: L.i }
123
+ }
124
+
125
+ // Test: tokenize the whole command
126
+ const cmd = 'cd /c/Users/zhuoz/Desktop/gui && ls -la build* 2>/dev/null || echo "No build directory found"';
127
+ const L = makeLexer(cmd);
128
+ const tokens: any[] = [];
129
+ let count = 0;
130
+ const start = Date.now();
131
+
132
+ while (true) {
133
+ const tok = nextToken(L);
134
+ tokens.push(tok);
135
+ count++;
136
+ if (tok.type === 'EOF' || count > 100) break;
137
+ if (Date.now() - start > 5000) {
138
+ console.log('TOKENIZER TIMEOUT at position', L.i, 'of', L.len);
139
+ console.log('Last token:', JSON.stringify(tok));
140
+ console.log('Remaining:', JSON.stringify(cmd.slice(L.i, L.i + 50)));
141
+ process.exit(1);
142
+ }
143
+ }
144
+
145
+ console.log('Tokenization complete:', count, 'tokens in', Date.now() - start, 'ms');
146
+ for (const t of tokens) {
147
+ console.log(' ', t.type, JSON.stringify(t.value));
148
+ }