@blanklogic/refinery-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -0
- package/package.json +32 -0
- package/src/index.js +36 -0
- package/src/refineryCompiler.js +263 -0
- package/src/refinerySanitizer.js +171 -0
package/README.md
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# BlankLogic Refinery Core
|
|
2
|
+
|
|
3
|
+
Shared local engine package for BlankLogic Refinery.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
npm install @blanklogic/refinery-core
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## API
|
|
10
|
+
|
|
11
|
+
```js
|
|
12
|
+
import { refineFull, runCodeDebloater, runTokenSanitizer } from '@blanklogic/refinery-core'
|
|
13
|
+
|
|
14
|
+
const cleaned = refineFull(input)
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
- `runCodeDebloater(input)` removes tracker noise, comments, and boilerplate while preserving useful structure.
|
|
18
|
+
- `runTokenSanitizer(input)` redacts obvious secrets and compacts text for LLM context windows.
|
|
19
|
+
- `refineFull(input)` runs de-bloat first, then secret sanitization.
|
|
20
|
+
|
|
21
|
+
This package is local-only. It does not call Stripe, BlankLogic servers, Ollama, or third-party APIs.
|
package/package.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@blanklogic/refinery-core",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Shared BlankLogic Refinery context hygiene engines.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"private": false,
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./src/index.js"
|
|
9
|
+
},
|
|
10
|
+
"files": [
|
|
11
|
+
"src"
|
|
12
|
+
],
|
|
13
|
+
"publishConfig": {
|
|
14
|
+
"access": "public"
|
|
15
|
+
},
|
|
16
|
+
"license": "UNLICENSED",
|
|
17
|
+
"repository": {
|
|
18
|
+
"type": "git",
|
|
19
|
+
"url": "git+https://github.com/briancrabtree-me/blanklogic-site.git",
|
|
20
|
+
"directory": "packages/core"
|
|
21
|
+
},
|
|
22
|
+
"keywords": [
|
|
23
|
+
"blanklogic",
|
|
24
|
+
"refinery",
|
|
25
|
+
"llm",
|
|
26
|
+
"context",
|
|
27
|
+
"sanitizer"
|
|
28
|
+
],
|
|
29
|
+
"engines": {
|
|
30
|
+
"node": ">=20"
|
|
31
|
+
}
|
|
32
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { runCodeDebloater, roughTokenCount, byteSize, lineCount } from './refineryCompiler.js'
|
|
2
|
+
import { runTokenSanitizer } from './refinerySanitizer.js'
|
|
3
|
+
|
|
4
|
+
export { runCodeDebloater, roughTokenCount, byteSize, lineCount, runTokenSanitizer }
|
|
5
|
+
|
|
6
|
+
export function refineFull(inputValue) {
|
|
7
|
+
const input = String(inputValue ?? '')
|
|
8
|
+
const debloated = runCodeDebloater(input)
|
|
9
|
+
const sanitized = runTokenSanitizer(debloated.output)
|
|
10
|
+
const bytesIn = byteSize(input)
|
|
11
|
+
const bytesOut = byteSize(sanitized.output)
|
|
12
|
+
const tokensBefore = roughTokenCount(input)
|
|
13
|
+
const tokensAfter = roughTokenCount(sanitized.output)
|
|
14
|
+
|
|
15
|
+
return {
|
|
16
|
+
output: sanitized.output,
|
|
17
|
+
stats: {
|
|
18
|
+
bytesIn,
|
|
19
|
+
bytesOut,
|
|
20
|
+
tokensBefore,
|
|
21
|
+
tokensAfter,
|
|
22
|
+
tokensSaved: Math.max(tokensBefore - tokensAfter, 0),
|
|
23
|
+
linesBefore: lineCount(input),
|
|
24
|
+
linesAfter: lineCount(sanitized.output),
|
|
25
|
+
reduction: bytesIn > 0 ? Math.max(0, Math.round((1 - bytesOut / bytesIn) * 100)) : 0,
|
|
26
|
+
trackersRemoved: debloated.trackersRemoved,
|
|
27
|
+
htmlCommentsRemoved: debloated.htmlCommentsRemoved,
|
|
28
|
+
scriptCommentsRemoved: debloated.scriptCommentsRemoved,
|
|
29
|
+
styleCommentsRemoved: debloated.styleCommentsRemoved,
|
|
30
|
+
secretsRedacted: sanitized.secretsRedacted,
|
|
31
|
+
commentsRemoved: sanitized.commentsRemoved,
|
|
32
|
+
domSafe: debloated.domSafe,
|
|
33
|
+
ms: debloated.ms + sanitized.ms,
|
|
34
|
+
},
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
const TRACKER_BLOCK_RE = /<(script|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi
|
|
2
|
+
|
|
3
|
+
const TRACKER_VENDOR_RE =
|
|
4
|
+
/google-analytics|googletagmanager|gtag|hotjar|segment|mixpanel|facebook|fbq|clarity/i
|
|
5
|
+
|
|
6
|
+
const RAW_BLOCK_RE = /<(script|style|pre|code|textarea)\b[^>]*>[\s\S]*?<\/\1>/gi
|
|
7
|
+
|
|
8
|
+
export function runCodeDebloater(inputValue) {
|
|
9
|
+
const start = now()
|
|
10
|
+
const input = String(inputValue ?? '')
|
|
11
|
+
const normalized = input.replace(/\r\n?/g, '\n')
|
|
12
|
+
const htmlLike = /<\/?[a-z][\s\S]*>/i.test(normalized)
|
|
13
|
+
const result = htmlLike ? compactHtml(normalized) : compactLooseCode(normalized)
|
|
14
|
+
const output = result.output.trim()
|
|
15
|
+
|
|
16
|
+
return {
|
|
17
|
+
output,
|
|
18
|
+
ms: now() - start,
|
|
19
|
+
reduction: reductionPercent(input, output),
|
|
20
|
+
tokensSaved: Math.max(roughTokenCount(input) - roughTokenCount(output), 0),
|
|
21
|
+
bytesIn: byteSize(input),
|
|
22
|
+
bytesOut: byteSize(output),
|
|
23
|
+
linesBefore: lineCount(input),
|
|
24
|
+
linesAfter: lineCount(output),
|
|
25
|
+
trackersRemoved: result.stats.trackersRemoved,
|
|
26
|
+
htmlCommentsRemoved: result.stats.htmlCommentsRemoved,
|
|
27
|
+
scriptCommentsRemoved: result.stats.scriptCommentsRemoved,
|
|
28
|
+
styleCommentsRemoved: result.stats.styleCommentsRemoved,
|
|
29
|
+
domSafe: htmlLike,
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function compactHtml(input) {
|
|
34
|
+
const stats = emptyStats()
|
|
35
|
+
let working = input.replace(TRACKER_BLOCK_RE, (block) => {
|
|
36
|
+
if (!TRACKER_VENDOR_RE.test(block)) return block
|
|
37
|
+
stats.trackersRemoved += 1
|
|
38
|
+
return ''
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
const blocks = []
|
|
42
|
+
working = working.replace(RAW_BLOCK_RE, (block, tagName) => {
|
|
43
|
+
const tag = String(tagName).toLowerCase()
|
|
44
|
+
let next = block
|
|
45
|
+
if (tag === 'script') {
|
|
46
|
+
const stripped = processScriptBlock(block)
|
|
47
|
+
stats.scriptCommentsRemoved += stripped.commentsRemoved
|
|
48
|
+
next = stripped.output
|
|
49
|
+
} else if (tag === 'style') {
|
|
50
|
+
const stripped = processStyleBlock(block)
|
|
51
|
+
stats.styleCommentsRemoved += stripped.commentsRemoved
|
|
52
|
+
next = stripped.output
|
|
53
|
+
}
|
|
54
|
+
const marker = `\u0000REFINERY_BLOCK_${blocks.length}\u0000`
|
|
55
|
+
blocks.push(next)
|
|
56
|
+
return marker
|
|
57
|
+
})
|
|
58
|
+
|
|
59
|
+
working = working.replace(/<!--[\s\S]*?-->/g, () => {
|
|
60
|
+
stats.htmlCommentsRemoved += 1
|
|
61
|
+
return ''
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
working = working
|
|
65
|
+
.split(/(<[^>]+>|\u0000REFINERY_BLOCK_\d+\u0000)/g)
|
|
66
|
+
.map((part) => {
|
|
67
|
+
if (!part) return ''
|
|
68
|
+
if (part.startsWith('\u0000REFINERY_BLOCK_')) return part
|
|
69
|
+
if (part.startsWith('<')) return part.replace(/\s{2,}/g, ' ').trim()
|
|
70
|
+
return compactTextNode(part)
|
|
71
|
+
})
|
|
72
|
+
.join('')
|
|
73
|
+
.replace(/>\s+</g, '><')
|
|
74
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
75
|
+
|
|
76
|
+
const output = working.replace(/\u0000REFINERY_BLOCK_(\d+)\u0000/g, (_m, i) => blocks[Number(i)] || '')
|
|
77
|
+
return { output, stats }
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function compactLooseCode(input) {
|
|
81
|
+
const stats = emptyStats()
|
|
82
|
+
const withoutHtmlComments = input.replace(/<!--[\s\S]*?-->/g, () => {
|
|
83
|
+
stats.htmlCommentsRemoved += 1
|
|
84
|
+
return ''
|
|
85
|
+
})
|
|
86
|
+
const script = stripJsComments(withoutHtmlComments)
|
|
87
|
+
stats.scriptCommentsRemoved = script.commentsRemoved
|
|
88
|
+
const style = stripCssComments(script.output)
|
|
89
|
+
stats.styleCommentsRemoved = style.commentsRemoved
|
|
90
|
+
return {
|
|
91
|
+
output: style.output
|
|
92
|
+
.replace(/[ \t]{2,}/g, ' ')
|
|
93
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
94
|
+
.trim(),
|
|
95
|
+
stats,
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function processScriptBlock(block) {
|
|
100
|
+
const parts = splitRawBlock(block)
|
|
101
|
+
const stripped = stripJsComments(parts.body)
|
|
102
|
+
return {
|
|
103
|
+
output: `${parts.open}${stripped.output.trim()}${parts.close}`,
|
|
104
|
+
commentsRemoved: stripped.commentsRemoved,
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function processStyleBlock(block) {
|
|
109
|
+
const parts = splitRawBlock(block)
|
|
110
|
+
const stripped = stripCssComments(parts.body)
|
|
111
|
+
return {
|
|
112
|
+
output: `${parts.open}${stripped.output.trim()}${parts.close}`,
|
|
113
|
+
commentsRemoved: stripped.commentsRemoved,
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
function splitRawBlock(block) {
|
|
118
|
+
const openMatch = block.match(/^<[^>]+>/)
|
|
119
|
+
const closeMatch = block.match(/<\/[a-z]+>\s*$/i)
|
|
120
|
+
const open = openMatch?.[0] || ''
|
|
121
|
+
const close = closeMatch?.[0] || ''
|
|
122
|
+
const body = block.slice(open.length, block.length - close.length)
|
|
123
|
+
return { open, body, close }
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
function stripCssComments(input) {
|
|
127
|
+
let output = ''
|
|
128
|
+
let commentsRemoved = 0
|
|
129
|
+
let quote = ''
|
|
130
|
+
let escaped = false
|
|
131
|
+
|
|
132
|
+
for (let i = 0; i < input.length; i += 1) {
|
|
133
|
+
const ch = input[i]
|
|
134
|
+
const next = input[i + 1]
|
|
135
|
+
|
|
136
|
+
if (quote) {
|
|
137
|
+
output += ch
|
|
138
|
+
if (escaped) {
|
|
139
|
+
escaped = false
|
|
140
|
+
} else if (ch === '\\') {
|
|
141
|
+
escaped = true
|
|
142
|
+
} else if (ch === quote) {
|
|
143
|
+
quote = ''
|
|
144
|
+
}
|
|
145
|
+
continue
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (ch === '"' || ch === "'") {
|
|
149
|
+
quote = ch
|
|
150
|
+
output += ch
|
|
151
|
+
continue
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
if (ch === '/' && next === '*') {
|
|
155
|
+
commentsRemoved += 1
|
|
156
|
+
i += 2
|
|
157
|
+
while (i < input.length && !(input[i] === '*' && input[i + 1] === '/')) i += 1
|
|
158
|
+
i += 1
|
|
159
|
+
continue
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
output += ch
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return { output, commentsRemoved }
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function stripJsComments(input) {
|
|
169
|
+
let output = ''
|
|
170
|
+
let commentsRemoved = 0
|
|
171
|
+
let quote = ''
|
|
172
|
+
let escaped = false
|
|
173
|
+
|
|
174
|
+
for (let i = 0; i < input.length; i += 1) {
|
|
175
|
+
const ch = input[i]
|
|
176
|
+
const next = input[i + 1]
|
|
177
|
+
|
|
178
|
+
if (quote) {
|
|
179
|
+
output += ch
|
|
180
|
+
if (escaped) {
|
|
181
|
+
escaped = false
|
|
182
|
+
} else if (ch === '\\') {
|
|
183
|
+
escaped = true
|
|
184
|
+
} else if (ch === quote) {
|
|
185
|
+
quote = ''
|
|
186
|
+
}
|
|
187
|
+
continue
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if (ch === '"' || ch === "'" || ch === '`') {
|
|
191
|
+
quote = ch
|
|
192
|
+
output += ch
|
|
193
|
+
continue
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
if (ch === '/' && next === '/' && isSlashCommentStart(output)) {
|
|
197
|
+
commentsRemoved += 1
|
|
198
|
+
while (i < input.length && input[i] !== '\n' && input[i] !== '\r') i += 1
|
|
199
|
+
output += input[i] || ''
|
|
200
|
+
continue
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (ch === '/' && next === '*') {
|
|
204
|
+
commentsRemoved += 1
|
|
205
|
+
i += 2
|
|
206
|
+
while (i < input.length && !(input[i] === '*' && input[i + 1] === '/')) i += 1
|
|
207
|
+
i += 1
|
|
208
|
+
continue
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
output += ch
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
return { output, commentsRemoved }
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function isSlashCommentStart(output) {
|
|
218
|
+
const trimmed = output.replace(/[ \t]+$/g, '')
|
|
219
|
+
const prev = trimmed[trimmed.length - 1] || ''
|
|
220
|
+
return !prev || prev === '\n' || prev === '\r' || prev === ';' || prev === '{' || prev === '}'
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
function compactTextNode(text) {
|
|
224
|
+
return text
|
|
225
|
+
.replace(/[ \t]{2,}/g, ' ')
|
|
226
|
+
.replace(/\s*\n\s*/g, ' ')
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function emptyStats() {
|
|
230
|
+
return {
|
|
231
|
+
trackersRemoved: 0,
|
|
232
|
+
htmlCommentsRemoved: 0,
|
|
233
|
+
scriptCommentsRemoved: 0,
|
|
234
|
+
styleCommentsRemoved: 0,
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
export function roughTokenCount(value) {
|
|
239
|
+
return String(value)
|
|
240
|
+
.trim()
|
|
241
|
+
.split(/[\s{}[\]():;,.'"`<>/\\|+=*-]+/)
|
|
242
|
+
.filter(Boolean).length
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export function byteSize(value) {
|
|
246
|
+
if (typeof TextEncoder !== 'undefined') return new TextEncoder().encode(String(value)).length
|
|
247
|
+
return Buffer.byteLength(String(value), 'utf8')
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
export function lineCount(value) {
|
|
251
|
+
const text = String(value || '')
|
|
252
|
+
return text.length ? text.replace(/\r\n?/g, '\n').split('\n').length : 0
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
function reductionPercent(input, output) {
|
|
256
|
+
const before = byteSize(input)
|
|
257
|
+
const after = byteSize(output)
|
|
258
|
+
return before > 0 ? Math.max(0, Math.round((1 - after / before) * 100)) : 0
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function now() {
|
|
262
|
+
return typeof performance !== 'undefined' && performance.now ? performance.now() : Date.now()
|
|
263
|
+
}
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
const SECRET_PATTERNS = [
|
|
2
|
+
{
|
|
3
|
+
pattern: /\bsk-[A-Za-z0-9_-]{12,}\b/g,
|
|
4
|
+
replacement: '[REDACTED_STRIPE_SECRET]',
|
|
5
|
+
},
|
|
6
|
+
{
|
|
7
|
+
pattern: /\bpk_(live|test)_[A-Za-z0-9]{12,}\b/g,
|
|
8
|
+
replacement: '[REDACTED_STRIPE_PUBLIC]',
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
pattern: /\bAIza[0-9A-Za-z_-]{20,}\b/g,
|
|
12
|
+
replacement: '[REDACTED_GOOGLE_API_KEY]',
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
pattern: /\b(?:ghp|github_pat|gho|ghu|ghs)_[A-Za-z0-9_]{20,}\b/g,
|
|
16
|
+
replacement: '[REDACTED_GITHUB_TOKEN]',
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
pattern: /\b[A-Za-z0-9_-]{24,}\.[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{20,}\b/g,
|
|
20
|
+
replacement: '[REDACTED_JWT]',
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
pattern: /\b(Authorization\s*:\s*Bearer\s+)(?!\[REDACTED)[^\s,;&]+/gi,
|
|
24
|
+
replacement: '$1[REDACTED_BEARER]',
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
pattern: /\b([A-Za-z0-9_-]*(?:api[_-]?key|token|secret|password)[A-Za-z0-9_-]*)\s*[:=]\s*["']?(?!\[REDACTED)[^"'\s,;&]+["']?/gi,
|
|
28
|
+
replacement: '$1=[REDACTED]',
|
|
29
|
+
},
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
const NAMED_SECRET_ASSIGNMENT_RE =
|
|
33
|
+
/(^|[{\s,;])(["']?[A-Za-z0-9_-]*(?:api[_-]?key|token|secret|password)[A-Za-z0-9_-]*["']?\s*[:=]\s*)(["']?)(?!\[REDACTED\])([^"'\s,;&}]+)(["']?)/gi
|
|
34
|
+
|
|
35
|
+
export function runTokenSanitizer(inputValue) {
|
|
36
|
+
const start = now()
|
|
37
|
+
const input = String(inputValue ?? '')
|
|
38
|
+
const normalized = input.replace(/\r\n?/g, '\n')
|
|
39
|
+
let secretsRedacted = 0
|
|
40
|
+
let protectedText = normalized.replace(NAMED_SECRET_ASSIGNMENT_RE, (_match, prefix, assignment, quote, _value) => {
|
|
41
|
+
secretsRedacted += 1
|
|
42
|
+
return `${prefix}${assignment}${quote}[REDACTED]${quote}`
|
|
43
|
+
})
|
|
44
|
+
for (const item of SECRET_PATTERNS) {
|
|
45
|
+
protectedText = protectedText.replace(item.pattern, (match) => {
|
|
46
|
+
secretsRedacted += 1
|
|
47
|
+
return replaceSingle(match, item.pattern, item.replacement)
|
|
48
|
+
})
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
let commentsRemoved = 0
|
|
52
|
+
let working = protectedText
|
|
53
|
+
.replace(/("""|''')[\s\S]*?\1/g, () => {
|
|
54
|
+
commentsRemoved += 1
|
|
55
|
+
return ''
|
|
56
|
+
})
|
|
57
|
+
.replace(/<!--[\s\S]*?-->/g, () => {
|
|
58
|
+
commentsRemoved += 1
|
|
59
|
+
return ''
|
|
60
|
+
})
|
|
61
|
+
.replace(/(^|\n)[ \t]*#(?![A-Fa-f0-9]{3,8}\b)[^\n\r]*/g, (match, prefix) => {
|
|
62
|
+
commentsRemoved += 1
|
|
63
|
+
return prefix
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
const slash = stripSlashComments(working)
|
|
67
|
+
commentsRemoved += slash.commentsRemoved
|
|
68
|
+
working = slash.output
|
|
69
|
+
|
|
70
|
+
const output = working
|
|
71
|
+
.replace(/```[\s\S]*?```/g, (block) => block.replace(/\n{2,}/g, '\n'))
|
|
72
|
+
.split('\n')
|
|
73
|
+
.map((line) => line.trim())
|
|
74
|
+
.filter(Boolean)
|
|
75
|
+
.join(' ')
|
|
76
|
+
.replace(/\s{2,}/g, ' ')
|
|
77
|
+
.replace(/\s*([{}[\]():;,=+\-*<>|])\s*/g, '$1')
|
|
78
|
+
.trim()
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
output,
|
|
82
|
+
ms: now() - start,
|
|
83
|
+
secretsRedacted,
|
|
84
|
+
commentsRemoved,
|
|
85
|
+
tokensSaved: Math.max(roughTokenCount(input) - roughTokenCount(output), 0),
|
|
86
|
+
bytesIn: byteSize(input),
|
|
87
|
+
bytesOut: byteSize(output),
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function roughTokenCount(value) {
|
|
92
|
+
return String(value)
|
|
93
|
+
.trim()
|
|
94
|
+
.split(/[\s{}[\]():;,.'"`<>/\\|+=*-]+/)
|
|
95
|
+
.filter(Boolean).length
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function byteSize(value) {
|
|
99
|
+
if (typeof TextEncoder !== 'undefined') return new TextEncoder().encode(String(value)).length
|
|
100
|
+
return Buffer.byteLength(String(value), 'utf8')
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function replaceSingle(value, pattern, replacement) {
|
|
104
|
+
const flags = pattern.flags.replace('g', '')
|
|
105
|
+
return String(value || '').replace(new RegExp(pattern.source, flags), replacement)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function stripSlashComments(input) {
|
|
109
|
+
let output = ''
|
|
110
|
+
let commentsRemoved = 0
|
|
111
|
+
let quote = ''
|
|
112
|
+
let escaped = false
|
|
113
|
+
|
|
114
|
+
for (let i = 0; i < input.length; i += 1) {
|
|
115
|
+
const ch = input[i]
|
|
116
|
+
const next = input[i + 1]
|
|
117
|
+
|
|
118
|
+
if (quote) {
|
|
119
|
+
output += ch
|
|
120
|
+
if (escaped) {
|
|
121
|
+
escaped = false
|
|
122
|
+
} else if (ch === '\\') {
|
|
123
|
+
escaped = true
|
|
124
|
+
} else if (ch === quote) {
|
|
125
|
+
quote = ''
|
|
126
|
+
}
|
|
127
|
+
continue
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
if (ch === '`' && next === '`' && input[i + 2] === '`') {
|
|
131
|
+
output += '```'
|
|
132
|
+
i += 2
|
|
133
|
+
continue
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (ch === '"' || ch === "'" || ch === '`') {
|
|
137
|
+
quote = ch
|
|
138
|
+
output += ch
|
|
139
|
+
continue
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (ch === '/' && next === '/' && isLineCommentStart(output)) {
|
|
143
|
+
commentsRemoved += 1
|
|
144
|
+
while (i < input.length && input[i] !== '\n' && input[i] !== '\r') i += 1
|
|
145
|
+
output += input[i] || ''
|
|
146
|
+
continue
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (ch === '/' && next === '*') {
|
|
150
|
+
commentsRemoved += 1
|
|
151
|
+
i += 2
|
|
152
|
+
while (i < input.length && !(input[i] === '*' && input[i + 1] === '/')) i += 1
|
|
153
|
+
i += 1
|
|
154
|
+
continue
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
output += ch
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return { output, commentsRemoved }
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function isLineCommentStart(output) {
|
|
164
|
+
const trimmed = output.replace(/[ \t]+$/g, '')
|
|
165
|
+
const prev = trimmed[trimmed.length - 1] || ''
|
|
166
|
+
return !prev || prev === '\n' || prev === '\r' || prev === ';' || prev === '{' || prev === '}'
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function now() {
|
|
170
|
+
return typeof performance !== 'undefined' && performance.now ? performance.now() : Date.now()
|
|
171
|
+
}
|