@blanklogic/refinery-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,21 @@
1
+ # BlankLogic Refinery Core
2
+
3
+ Shared local engine package for BlankLogic Refinery.
4
+
5
+ ```bash
6
+ npm install @blanklogic/refinery-core
7
+ ```
8
+
9
+ ## API
10
+
11
+ ```js
12
+ import { refineFull, runCodeDebloater, runTokenSanitizer } from '@blanklogic/refinery-core'
13
+
14
+ const cleaned = refineFull(input)
15
+ ```
16
+
17
+ - `runCodeDebloater(input)` removes tracker noise, comments, and boilerplate while preserving useful structure.
18
+ - `runTokenSanitizer(input)` redacts obvious secrets and compacts text for LLM context windows.
19
+ - `refineFull(input)` runs de-bloat first, then secret sanitization.
20
+
21
+ This package is local-only. It does not call Stripe, BlankLogic servers, Ollama, or third-party APIs.
package/package.json ADDED
@@ -0,0 +1,32 @@
1
+ {
2
+ "name": "@blanklogic/refinery-core",
3
+ "version": "0.1.0",
4
+ "description": "Shared BlankLogic Refinery context hygiene engines.",
5
+ "type": "module",
6
+ "private": false,
7
+ "exports": {
8
+ ".": "./src/index.js"
9
+ },
10
+ "files": [
11
+ "src"
12
+ ],
13
+ "publishConfig": {
14
+ "access": "public"
15
+ },
16
+ "license": "UNLICENSED",
17
+ "repository": {
18
+ "type": "git",
19
+ "url": "git+https://github.com/briancrabtree-me/blanklogic-site.git",
20
+ "directory": "packages/core"
21
+ },
22
+ "keywords": [
23
+ "blanklogic",
24
+ "refinery",
25
+ "llm",
26
+ "context",
27
+ "sanitizer"
28
+ ],
29
+ "engines": {
30
+ "node": ">=20"
31
+ }
32
+ }
package/src/index.js ADDED
@@ -0,0 +1,36 @@
1
+ import { runCodeDebloater, roughTokenCount, byteSize, lineCount } from './refineryCompiler.js'
2
+ import { runTokenSanitizer } from './refinerySanitizer.js'
3
+
4
+ export { runCodeDebloater, roughTokenCount, byteSize, lineCount, runTokenSanitizer }
5
+
6
+ export function refineFull(inputValue) {
7
+ const input = String(inputValue ?? '')
8
+ const debloated = runCodeDebloater(input)
9
+ const sanitized = runTokenSanitizer(debloated.output)
10
+ const bytesIn = byteSize(input)
11
+ const bytesOut = byteSize(sanitized.output)
12
+ const tokensBefore = roughTokenCount(input)
13
+ const tokensAfter = roughTokenCount(sanitized.output)
14
+
15
+ return {
16
+ output: sanitized.output,
17
+ stats: {
18
+ bytesIn,
19
+ bytesOut,
20
+ tokensBefore,
21
+ tokensAfter,
22
+ tokensSaved: Math.max(tokensBefore - tokensAfter, 0),
23
+ linesBefore: lineCount(input),
24
+ linesAfter: lineCount(sanitized.output),
25
+ reduction: bytesIn > 0 ? Math.max(0, Math.round((1 - bytesOut / bytesIn) * 100)) : 0,
26
+ trackersRemoved: debloated.trackersRemoved,
27
+ htmlCommentsRemoved: debloated.htmlCommentsRemoved,
28
+ scriptCommentsRemoved: debloated.scriptCommentsRemoved,
29
+ styleCommentsRemoved: debloated.styleCommentsRemoved,
30
+ secretsRedacted: sanitized.secretsRedacted,
31
+ commentsRemoved: sanitized.commentsRemoved,
32
+ domSafe: debloated.domSafe,
33
+ ms: debloated.ms + sanitized.ms,
34
+ },
35
+ }
36
+ }
@@ -0,0 +1,263 @@
1
+ const TRACKER_BLOCK_RE = /<(script|noscript)\b[^>]*>[\s\S]*?<\/\1>/gi
2
+
3
+ const TRACKER_VENDOR_RE =
4
+ /google-analytics|googletagmanager|gtag|hotjar|segment|mixpanel|facebook|fbq|clarity/i
5
+
6
+ const RAW_BLOCK_RE = /<(script|style|pre|code|textarea)\b[^>]*>[\s\S]*?<\/\1>/gi
7
+
8
+ export function runCodeDebloater(inputValue) {
9
+ const start = now()
10
+ const input = String(inputValue ?? '')
11
+ const normalized = input.replace(/\r\n?/g, '\n')
12
+ const htmlLike = /<\/?[a-z][\s\S]*>/i.test(normalized)
13
+ const result = htmlLike ? compactHtml(normalized) : compactLooseCode(normalized)
14
+ const output = result.output.trim()
15
+
16
+ return {
17
+ output,
18
+ ms: now() - start,
19
+ reduction: reductionPercent(input, output),
20
+ tokensSaved: Math.max(roughTokenCount(input) - roughTokenCount(output), 0),
21
+ bytesIn: byteSize(input),
22
+ bytesOut: byteSize(output),
23
+ linesBefore: lineCount(input),
24
+ linesAfter: lineCount(output),
25
+ trackersRemoved: result.stats.trackersRemoved,
26
+ htmlCommentsRemoved: result.stats.htmlCommentsRemoved,
27
+ scriptCommentsRemoved: result.stats.scriptCommentsRemoved,
28
+ styleCommentsRemoved: result.stats.styleCommentsRemoved,
29
+ domSafe: htmlLike,
30
+ }
31
+ }
32
+
33
+ function compactHtml(input) {
34
+ const stats = emptyStats()
35
+ let working = input.replace(TRACKER_BLOCK_RE, (block) => {
36
+ if (!TRACKER_VENDOR_RE.test(block)) return block
37
+ stats.trackersRemoved += 1
38
+ return ''
39
+ })
40
+
41
+ const blocks = []
42
+ working = working.replace(RAW_BLOCK_RE, (block, tagName) => {
43
+ const tag = String(tagName).toLowerCase()
44
+ let next = block
45
+ if (tag === 'script') {
46
+ const stripped = processScriptBlock(block)
47
+ stats.scriptCommentsRemoved += stripped.commentsRemoved
48
+ next = stripped.output
49
+ } else if (tag === 'style') {
50
+ const stripped = processStyleBlock(block)
51
+ stats.styleCommentsRemoved += stripped.commentsRemoved
52
+ next = stripped.output
53
+ }
54
+ const marker = `\u0000REFINERY_BLOCK_${blocks.length}\u0000`
55
+ blocks.push(next)
56
+ return marker
57
+ })
58
+
59
+ working = working.replace(/<!--[\s\S]*?-->/g, () => {
60
+ stats.htmlCommentsRemoved += 1
61
+ return ''
62
+ })
63
+
64
+ working = working
65
+ .split(/(<[^>]+>|\u0000REFINERY_BLOCK_\d+\u0000)/g)
66
+ .map((part) => {
67
+ if (!part) return ''
68
+ if (part.startsWith('\u0000REFINERY_BLOCK_')) return part
69
+ if (part.startsWith('<')) return part.replace(/\s{2,}/g, ' ').trim()
70
+ return compactTextNode(part)
71
+ })
72
+ .join('')
73
+ .replace(/>\s+</g, '><')
74
+ .replace(/\n{3,}/g, '\n\n')
75
+
76
+ const output = working.replace(/\u0000REFINERY_BLOCK_(\d+)\u0000/g, (_m, i) => blocks[Number(i)] || '')
77
+ return { output, stats }
78
+ }
79
+
80
+ function compactLooseCode(input) {
81
+ const stats = emptyStats()
82
+ const withoutHtmlComments = input.replace(/<!--[\s\S]*?-->/g, () => {
83
+ stats.htmlCommentsRemoved += 1
84
+ return ''
85
+ })
86
+ const script = stripJsComments(withoutHtmlComments)
87
+ stats.scriptCommentsRemoved = script.commentsRemoved
88
+ const style = stripCssComments(script.output)
89
+ stats.styleCommentsRemoved = style.commentsRemoved
90
+ return {
91
+ output: style.output
92
+ .replace(/[ \t]{2,}/g, ' ')
93
+ .replace(/\n{3,}/g, '\n\n')
94
+ .trim(),
95
+ stats,
96
+ }
97
+ }
98
+
99
+ function processScriptBlock(block) {
100
+ const parts = splitRawBlock(block)
101
+ const stripped = stripJsComments(parts.body)
102
+ return {
103
+ output: `${parts.open}${stripped.output.trim()}${parts.close}`,
104
+ commentsRemoved: stripped.commentsRemoved,
105
+ }
106
+ }
107
+
108
+ function processStyleBlock(block) {
109
+ const parts = splitRawBlock(block)
110
+ const stripped = stripCssComments(parts.body)
111
+ return {
112
+ output: `${parts.open}${stripped.output.trim()}${parts.close}`,
113
+ commentsRemoved: stripped.commentsRemoved,
114
+ }
115
+ }
116
+
117
+ function splitRawBlock(block) {
118
+ const openMatch = block.match(/^<[^>]+>/)
119
+ const closeMatch = block.match(/<\/[a-z]+>\s*$/i)
120
+ const open = openMatch?.[0] || ''
121
+ const close = closeMatch?.[0] || ''
122
+ const body = block.slice(open.length, block.length - close.length)
123
+ return { open, body, close }
124
+ }
125
+
126
+ function stripCssComments(input) {
127
+ let output = ''
128
+ let commentsRemoved = 0
129
+ let quote = ''
130
+ let escaped = false
131
+
132
+ for (let i = 0; i < input.length; i += 1) {
133
+ const ch = input[i]
134
+ const next = input[i + 1]
135
+
136
+ if (quote) {
137
+ output += ch
138
+ if (escaped) {
139
+ escaped = false
140
+ } else if (ch === '\\') {
141
+ escaped = true
142
+ } else if (ch === quote) {
143
+ quote = ''
144
+ }
145
+ continue
146
+ }
147
+
148
+ if (ch === '"' || ch === "'") {
149
+ quote = ch
150
+ output += ch
151
+ continue
152
+ }
153
+
154
+ if (ch === '/' && next === '*') {
155
+ commentsRemoved += 1
156
+ i += 2
157
+ while (i < input.length && !(input[i] === '*' && input[i + 1] === '/')) i += 1
158
+ i += 1
159
+ continue
160
+ }
161
+
162
+ output += ch
163
+ }
164
+
165
+ return { output, commentsRemoved }
166
+ }
167
+
168
+ function stripJsComments(input) {
169
+ let output = ''
170
+ let commentsRemoved = 0
171
+ let quote = ''
172
+ let escaped = false
173
+
174
+ for (let i = 0; i < input.length; i += 1) {
175
+ const ch = input[i]
176
+ const next = input[i + 1]
177
+
178
+ if (quote) {
179
+ output += ch
180
+ if (escaped) {
181
+ escaped = false
182
+ } else if (ch === '\\') {
183
+ escaped = true
184
+ } else if (ch === quote) {
185
+ quote = ''
186
+ }
187
+ continue
188
+ }
189
+
190
+ if (ch === '"' || ch === "'" || ch === '`') {
191
+ quote = ch
192
+ output += ch
193
+ continue
194
+ }
195
+
196
+ if (ch === '/' && next === '/' && isSlashCommentStart(output)) {
197
+ commentsRemoved += 1
198
+ while (i < input.length && input[i] !== '\n' && input[i] !== '\r') i += 1
199
+ output += input[i] || ''
200
+ continue
201
+ }
202
+
203
+ if (ch === '/' && next === '*') {
204
+ commentsRemoved += 1
205
+ i += 2
206
+ while (i < input.length && !(input[i] === '*' && input[i + 1] === '/')) i += 1
207
+ i += 1
208
+ continue
209
+ }
210
+
211
+ output += ch
212
+ }
213
+
214
+ return { output, commentsRemoved }
215
+ }
216
+
217
+ function isSlashCommentStart(output) {
218
+ const trimmed = output.replace(/[ \t]+$/g, '')
219
+ const prev = trimmed[trimmed.length - 1] || ''
220
+ return !prev || prev === '\n' || prev === '\r' || prev === ';' || prev === '{' || prev === '}'
221
+ }
222
+
223
+ function compactTextNode(text) {
224
+ return text
225
+ .replace(/[ \t]{2,}/g, ' ')
226
+ .replace(/\s*\n\s*/g, ' ')
227
+ }
228
+
229
+ function emptyStats() {
230
+ return {
231
+ trackersRemoved: 0,
232
+ htmlCommentsRemoved: 0,
233
+ scriptCommentsRemoved: 0,
234
+ styleCommentsRemoved: 0,
235
+ }
236
+ }
237
+
238
+ export function roughTokenCount(value) {
239
+ return String(value)
240
+ .trim()
241
+ .split(/[\s{}[\]():;,.'"`<>/\\|+=*-]+/)
242
+ .filter(Boolean).length
243
+ }
244
+
245
+ export function byteSize(value) {
246
+ if (typeof TextEncoder !== 'undefined') return new TextEncoder().encode(String(value)).length
247
+ return Buffer.byteLength(String(value), 'utf8')
248
+ }
249
+
250
+ export function lineCount(value) {
251
+ const text = String(value || '')
252
+ return text.length ? text.replace(/\r\n?/g, '\n').split('\n').length : 0
253
+ }
254
+
255
+ function reductionPercent(input, output) {
256
+ const before = byteSize(input)
257
+ const after = byteSize(output)
258
+ return before > 0 ? Math.max(0, Math.round((1 - after / before) * 100)) : 0
259
+ }
260
+
261
+ function now() {
262
+ return typeof performance !== 'undefined' && performance.now ? performance.now() : Date.now()
263
+ }
@@ -0,0 +1,171 @@
1
+ const SECRET_PATTERNS = [
2
+ {
3
+ pattern: /\bsk-[A-Za-z0-9_-]{12,}\b/g,
4
+ replacement: '[REDACTED_STRIPE_SECRET]',
5
+ },
6
+ {
7
+ pattern: /\bpk_(live|test)_[A-Za-z0-9]{12,}\b/g,
8
+ replacement: '[REDACTED_STRIPE_PUBLIC]',
9
+ },
10
+ {
11
+ pattern: /\bAIza[0-9A-Za-z_-]{20,}\b/g,
12
+ replacement: '[REDACTED_GOOGLE_API_KEY]',
13
+ },
14
+ {
15
+ pattern: /\b(?:ghp|github_pat|gho|ghu|ghs)_[A-Za-z0-9_]{20,}\b/g,
16
+ replacement: '[REDACTED_GITHUB_TOKEN]',
17
+ },
18
+ {
19
+ pattern: /\b[A-Za-z0-9_-]{24,}\.[A-Za-z0-9_-]{12,}\.[A-Za-z0-9_-]{20,}\b/g,
20
+ replacement: '[REDACTED_JWT]',
21
+ },
22
+ {
23
+ pattern: /\b(Authorization\s*:\s*Bearer\s+)(?!\[REDACTED)[^\s,;&]+/gi,
24
+ replacement: '$1[REDACTED_BEARER]',
25
+ },
26
+ {
27
+ pattern: /\b([A-Za-z0-9_-]*(?:api[_-]?key|token|secret|password)[A-Za-z0-9_-]*)\s*[:=]\s*["']?(?!\[REDACTED)[^"'\s,;&]+["']?/gi,
28
+ replacement: '$1=[REDACTED]',
29
+ },
30
+ ]
31
+
32
+ const NAMED_SECRET_ASSIGNMENT_RE =
33
+ /(^|[{\s,;])(["']?[A-Za-z0-9_-]*(?:api[_-]?key|token|secret|password)[A-Za-z0-9_-]*["']?\s*[:=]\s*)(["']?)(?!\[REDACTED\])([^"'\s,;&}]+)(["']?)/gi
34
+
35
+ export function runTokenSanitizer(inputValue) {
36
+ const start = now()
37
+ const input = String(inputValue ?? '')
38
+ const normalized = input.replace(/\r\n?/g, '\n')
39
+ let secretsRedacted = 0
40
+ let protectedText = normalized.replace(NAMED_SECRET_ASSIGNMENT_RE, (_match, prefix, assignment, quote, _value) => {
41
+ secretsRedacted += 1
42
+ return `${prefix}${assignment}${quote}[REDACTED]${quote}`
43
+ })
44
+ for (const item of SECRET_PATTERNS) {
45
+ protectedText = protectedText.replace(item.pattern, (match) => {
46
+ secretsRedacted += 1
47
+ return replaceSingle(match, item.pattern, item.replacement)
48
+ })
49
+ }
50
+
51
+ let commentsRemoved = 0
52
+ let working = protectedText
53
+ .replace(/("""|''')[\s\S]*?\1/g, () => {
54
+ commentsRemoved += 1
55
+ return ''
56
+ })
57
+ .replace(/<!--[\s\S]*?-->/g, () => {
58
+ commentsRemoved += 1
59
+ return ''
60
+ })
61
+ .replace(/(^|\n)[ \t]*#(?![A-Fa-f0-9]{3,8}\b)[^\n\r]*/g, (match, prefix) => {
62
+ commentsRemoved += 1
63
+ return prefix
64
+ })
65
+
66
+ const slash = stripSlashComments(working)
67
+ commentsRemoved += slash.commentsRemoved
68
+ working = slash.output
69
+
70
+ const output = working
71
+ .replace(/```[\s\S]*?```/g, (block) => block.replace(/\n{2,}/g, '\n'))
72
+ .split('\n')
73
+ .map((line) => line.trim())
74
+ .filter(Boolean)
75
+ .join(' ')
76
+ .replace(/\s{2,}/g, ' ')
77
+ .replace(/\s*([{}[\]():;,=+\-*<>|])\s*/g, '$1')
78
+ .trim()
79
+
80
+ return {
81
+ output,
82
+ ms: now() - start,
83
+ secretsRedacted,
84
+ commentsRemoved,
85
+ tokensSaved: Math.max(roughTokenCount(input) - roughTokenCount(output), 0),
86
+ bytesIn: byteSize(input),
87
+ bytesOut: byteSize(output),
88
+ }
89
+ }
90
+
91
+ function roughTokenCount(value) {
92
+ return String(value)
93
+ .trim()
94
+ .split(/[\s{}[\]():;,.'"`<>/\\|+=*-]+/)
95
+ .filter(Boolean).length
96
+ }
97
+
98
+ function byteSize(value) {
99
+ if (typeof TextEncoder !== 'undefined') return new TextEncoder().encode(String(value)).length
100
+ return Buffer.byteLength(String(value), 'utf8')
101
+ }
102
+
103
+ function replaceSingle(value, pattern, replacement) {
104
+ const flags = pattern.flags.replace('g', '')
105
+ return String(value || '').replace(new RegExp(pattern.source, flags), replacement)
106
+ }
107
+
108
+ function stripSlashComments(input) {
109
+ let output = ''
110
+ let commentsRemoved = 0
111
+ let quote = ''
112
+ let escaped = false
113
+
114
+ for (let i = 0; i < input.length; i += 1) {
115
+ const ch = input[i]
116
+ const next = input[i + 1]
117
+
118
+ if (quote) {
119
+ output += ch
120
+ if (escaped) {
121
+ escaped = false
122
+ } else if (ch === '\\') {
123
+ escaped = true
124
+ } else if (ch === quote) {
125
+ quote = ''
126
+ }
127
+ continue
128
+ }
129
+
130
+ if (ch === '`' && next === '`' && input[i + 2] === '`') {
131
+ output += '```'
132
+ i += 2
133
+ continue
134
+ }
135
+
136
+ if (ch === '"' || ch === "'" || ch === '`') {
137
+ quote = ch
138
+ output += ch
139
+ continue
140
+ }
141
+
142
+ if (ch === '/' && next === '/' && isLineCommentStart(output)) {
143
+ commentsRemoved += 1
144
+ while (i < input.length && input[i] !== '\n' && input[i] !== '\r') i += 1
145
+ output += input[i] || ''
146
+ continue
147
+ }
148
+
149
+ if (ch === '/' && next === '*') {
150
+ commentsRemoved += 1
151
+ i += 2
152
+ while (i < input.length && !(input[i] === '*' && input[i + 1] === '/')) i += 1
153
+ i += 1
154
+ continue
155
+ }
156
+
157
+ output += ch
158
+ }
159
+
160
+ return { output, commentsRemoved }
161
+ }
162
+
163
+ function isLineCommentStart(output) {
164
+ const trimmed = output.replace(/[ \t]+$/g, '')
165
+ const prev = trimmed[trimmed.length - 1] || ''
166
+ return !prev || prev === '\n' || prev === '\r' || prev === ';' || prev === '{' || prev === '}'
167
+ }
168
+
169
+ function now() {
170
+ return typeof performance !== 'undefined' && performance.now ? performance.now() : Date.now()
171
+ }