agent-cache-optimizer 0.5.3 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/splitting.ts CHANGED
@@ -25,26 +25,30 @@ export function splitBlock(block: string, threshold = DEFAULT_SPLIT_THRESHOLD):
25
25
 
26
26
  const trimmed = block.trim()
27
27
 
28
- // ── JSON object array: {"name": "A", ...}, {"name": "B", ...} ──
28
+ // ── JSON: brace-depth parser ───────────────────────────────────
29
+ // Handles JSON arrays [{...}, {...}, ...] and consecutive objects
30
+ // without external dependencies or brittle regex.
31
+ if (trimmed.startsWith("[") && trimmed.endsWith("]")) {
32
+ const items = splitJSONItems(trimmed.slice(1, -1))
33
+ if (items && items.length >= 2) return items
34
+ }
29
35
  if (trimmed.startsWith("{")) {
30
- const objects = block.match(/\{[^}{]*"name"\s*:\s*"[^"]+"[^}]*\}/g)
31
- if (objects && objects.length >= 2) return objects
36
+ const items = splitJSONItems(trimmed)
37
+ if (items && items.length >= 2) return items
32
38
  }
33
39
 
34
- // ── Markdown: split at ## section headers ──────────────────────
35
- if (block.includes("\n## ")) {
36
- const sections = block.split(/\n(?=## )/)
37
- if (sections.length >= 2) return sections
38
- }
40
+ // ── Markdown: split at section headers outside fenced code ─────
41
+ const markdownSections = splitMarkdownSections(block)
42
+ if (markdownSections) return markdownSections
43
+
44
+ // ── Markdown: split long top-level lists ───────────────────────
45
+ const markdownListItems = splitMarkdownListItems(block)
46
+ if (markdownListItems) return markdownListItems
39
47
 
40
- // ── XML/HTML: split at top-level closing tags ──────────────────
48
+ // ── XML/HTML: split top-level sibling elements ─────────────────
41
49
  if (/^<(\w+)[^>]*>/.test(trimmed)) {
42
- const tagMatch = trimmed.match(/^<(\w+)[^>]*>/)
43
- if (tagMatch) {
44
- const tag = tagMatch[1]
45
- const parts = block.split(new RegExp(`(?=</?${tag}[>\\s])`))
46
- if (parts.length >= 2) return parts
47
- }
50
+ const parts = splitXMLTopLevelElements(trimmed)
51
+ if (parts) return parts
48
52
  }
49
53
 
50
54
  // ── Fallback: paragraph boundaries ─────────────────────────────
@@ -54,6 +58,142 @@ export function splitBlock(block: string, threshold = DEFAULT_SPLIT_THRESHOLD):
54
58
  return [block]
55
59
  }
56
60
 
61
+ /**
62
+ * Lightweight brace-depth parser that extracts top-level JSON objects
63
+ * from an array body or consecutive-object body.
64
+ *
65
+ * Handles arbitrary nesting depth, escaped quotes inside strings, and
66
+ * whitespace/commas between items. Returns null when fewer than 2 items
67
+ * are found.
68
+ */
69
+ function splitJSONItems(text: string): string[] | null {
70
+ const items: string[] = []
71
+ let depth = 0
72
+ let start = -1
73
+ let inString = false
74
+
75
+ for (let i = 0; i < text.length; i++) {
76
+ const ch = text[i]
77
+ if (inString) {
78
+ if (ch === "\\")
79
+ i++ // skip escaped char
80
+ else if (ch === '"') inString = false
81
+ } else {
82
+ if (ch === '"') inString = true
83
+ else if (ch === "{") {
84
+ if (depth === 0) start = i
85
+ depth++
86
+ } else if (ch === "}") {
87
+ depth--
88
+ if (depth === 0 && start >= 0) {
89
+ items.push(text.slice(start, i + 1))
90
+ start = -1
91
+ }
92
+ }
93
+ }
94
+ }
95
+
96
+ return items.length >= 2 ? items : null
97
+ }
98
+
99
+ function splitMarkdownSections(block: string): string[] | null {
100
+ const lines = block.split("\n")
101
+ const candidates: Record<1 | 2 | 3, number[]> = { 1: [], 2: [], 3: [] }
102
+ let inFence = false
103
+
104
+ for (let i = 0; i < lines.length; i++) {
105
+ const line = lines[i] ?? ""
106
+ if (/^\s*(```|~~~)/.test(line)) {
107
+ inFence = !inFence
108
+ continue
109
+ }
110
+ if (inFence) continue
111
+
112
+ const match = line.match(/^(#{1,3})\s+\S/)
113
+ if (!match) continue
114
+ const level = match[1]?.length
115
+ if (level === 1 || level === 2 || level === 3) candidates[level].push(i)
116
+ }
117
+
118
+ const level = ([1, 2, 3] as const).find((candidate) => candidates[candidate].length >= 2)
119
+ if (!level) return null
120
+
121
+ const starts = candidates[level]
122
+ const firstStart = starts[0]
123
+ if (firstStart === undefined) return null
124
+ const sections: string[] = []
125
+ if (firstStart !== 0) sections.push(lines.slice(0, firstStart).join("\n").trimEnd())
126
+ for (let i = 0; i < starts.length; i++) {
127
+ const start = starts[i]!
128
+ const end = starts[i + 1] ?? lines.length
129
+ sections.push(lines.slice(start, end).join("\n").trimEnd())
130
+ }
131
+
132
+ const filtered = sections.filter((section) => section.trim().length > 0)
133
+ return filtered.length >= 2 ? filtered : null
134
+ }
135
+
136
+ function splitMarkdownListItems(block: string): string[] | null {
137
+ const lines = block.split("\n")
138
+ const starts: number[] = []
139
+ let inFence = false
140
+
141
+ for (let i = 0; i < lines.length; i++) {
142
+ const line = lines[i] ?? ""
143
+ if (/^\s*(```|~~~)/.test(line)) {
144
+ inFence = !inFence
145
+ continue
146
+ }
147
+ if (inFence) continue
148
+ if (/^([-*+]|\d+[.)])\s+\S/.test(line)) starts.push(i)
149
+ }
150
+
151
+ if (starts.length < 3) return null
152
+
153
+ const items: string[] = []
154
+ for (let i = 0; i < starts.length; i++) {
155
+ const start = starts[i]!
156
+ const end = starts[i + 1] ?? lines.length
157
+ items.push(lines.slice(start, end).join("\n").trimEnd())
158
+ }
159
+
160
+ return items.length >= 3 ? items : null
161
+ }
162
+
163
+ function splitXMLTopLevelElements(text: string): string[] | null {
164
+ const items: string[] = []
165
+ const tagRe = /<\/?([A-Za-z][\w:.-]*)(?:\s[^<>]*)?>/g
166
+ let depth = 0
167
+ let start = -1
168
+ let match: RegExpExecArray | null
169
+
170
+ while ((match = tagRe.exec(text)) !== null) {
171
+ const tag = match[0]
172
+ const closing = tag.startsWith("</")
173
+ const selfClosing = /\/>$/.test(tag)
174
+
175
+ if (!closing) {
176
+ if (depth === 0) start = match.index
177
+ if (selfClosing && depth === 0 && start >= 0) {
178
+ items.push(text.slice(start, tagRe.lastIndex).trim())
179
+ start = -1
180
+ } else if (!selfClosing) {
181
+ depth++
182
+ }
183
+ continue
184
+ }
185
+
186
+ depth--
187
+ if (depth < 0) return null
188
+ if (depth === 0 && start >= 0) {
189
+ items.push(text.slice(start, tagRe.lastIndex).trim())
190
+ start = -1
191
+ }
192
+ }
193
+
194
+ return depth === 0 && items.length >= 2 ? items : null
195
+ }
196
+
57
197
  /**
58
198
  * Apply splitting to an array of blocks, returning a flat array.
59
199
  */