@intuned/browser-dev 0.1.16-dev.1 → 0.1.17-dev.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/accumulate_llm_cost.py +120 -0
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +6 -12
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +11 -15
- package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +7 -16
- package/dist/optimized-extractors/common/index.js +12 -2
- package/dist/optimized-extractors/common/utils.js +6 -0
- package/package.json +1 -1
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Accumulate total LLM cost (in cents) across all runs in a results JSONL file.
|
|
4
|
+
|
|
5
|
+
For each line in the results JSONL:
|
|
6
|
+
- read the `log_url` field
|
|
7
|
+
- download that logs JSONL
|
|
8
|
+
- find every log line whose message contains "Total LLM Cost In Cents: <number>"
|
|
9
|
+
- sum all those numbers
|
|
10
|
+
|
|
11
|
+
Usage:
|
|
12
|
+
python accumulate_llm_cost.py /path/to/results.jsonl
|
|
13
|
+
python accumulate_llm_cost.py /path/to/results.jsonl --workers 16
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import argparse
|
|
17
|
+
import json
|
|
18
|
+
import re
|
|
19
|
+
import sys
|
|
20
|
+
import urllib.request
|
|
21
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
22
|
+
|
|
23
|
+
# Matches: "Total LLM Cost In Cents: 01.1" -> captures "01.1"
|
|
24
|
+
COST_RE = re.compile(r"Total LLM Cost In Cents:\s*([0-9]*\.?[0-9]+)", re.IGNORECASE)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def download_text(url: str, timeout: int = 120) -> str:
|
|
28
|
+
req = urllib.request.Request(url, headers={"User-Agent": "llm-cost-accumulator"})
|
|
29
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
30
|
+
return resp.read().decode("utf-8", errors="replace")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def costs_from_log_text(text: str):
|
|
34
|
+
"""Yield every cost value found in a logs JSONL blob."""
|
|
35
|
+
for line in text.splitlines():
|
|
36
|
+
line = line.strip()
|
|
37
|
+
if not line:
|
|
38
|
+
continue
|
|
39
|
+
message = None
|
|
40
|
+
try:
|
|
41
|
+
obj = json.loads(line)
|
|
42
|
+
message = obj.get("message")
|
|
43
|
+
except json.JSONDecodeError:
|
|
44
|
+
# fall back to scanning the raw line
|
|
45
|
+
message = line
|
|
46
|
+
if not message:
|
|
47
|
+
continue
|
|
48
|
+
for m in COST_RE.finditer(str(message)):
|
|
49
|
+
yield float(m.group(1))
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def process_run(record: dict):
|
|
53
|
+
"""Return (label, total_cents, hit_count, error) for one results record."""
|
|
54
|
+
api = record.get("apiInfo", {})
|
|
55
|
+
name = api.get("name", "?")
|
|
56
|
+
run_id = api.get("runId") or record.get("projectJobRun", {}).get("id", "?")
|
|
57
|
+
label = f"{name} / {run_id}"
|
|
58
|
+
|
|
59
|
+
log_url = api.get("log_url")
|
|
60
|
+
if not log_url:
|
|
61
|
+
return label, 0.0, 0, "no log_url"
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
text = download_text(log_url)
|
|
65
|
+
except Exception as e: # noqa: BLE001
|
|
66
|
+
return label, 0.0, 0, f"download failed: {e}"
|
|
67
|
+
|
|
68
|
+
costs = list(costs_from_log_text(text))
|
|
69
|
+
return label, sum(costs), len(costs), None
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def main():
|
|
73
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
74
|
+
parser.add_argument("results", help="Path to the results JSONL file")
|
|
75
|
+
parser.add_argument("--workers", type=int, default=8, help="Parallel downloads")
|
|
76
|
+
parser.add_argument("--quiet", action="store_true", help="Only print the grand total")
|
|
77
|
+
args = parser.parse_args()
|
|
78
|
+
|
|
79
|
+
with open(args.results, "r", encoding="utf-8") as f:
|
|
80
|
+
records = []
|
|
81
|
+
for ln, line in enumerate(f, 1):
|
|
82
|
+
line = line.strip()
|
|
83
|
+
if not line:
|
|
84
|
+
continue
|
|
85
|
+
try:
|
|
86
|
+
records.append(json.loads(line))
|
|
87
|
+
except json.JSONDecodeError as e:
|
|
88
|
+
print(f" [warn] skipping malformed line {ln}: {e}", file=sys.stderr)
|
|
89
|
+
|
|
90
|
+
if not args.quiet:
|
|
91
|
+
print(f"Loaded {len(records)} run(s) from {args.results}\n")
|
|
92
|
+
|
|
93
|
+
grand_total = 0.0
|
|
94
|
+
total_hits = 0
|
|
95
|
+
errors = 0
|
|
96
|
+
|
|
97
|
+
with ThreadPoolExecutor(max_workers=args.workers) as pool:
|
|
98
|
+
futures = {pool.submit(process_run, rec): rec for rec in records}
|
|
99
|
+
for fut in as_completed(futures):
|
|
100
|
+
label, total, hits, err = fut.result()
|
|
101
|
+
if err:
|
|
102
|
+
errors += 1
|
|
103
|
+
if not args.quiet:
|
|
104
|
+
print(f" [error] {label}: {err}")
|
|
105
|
+
continue
|
|
106
|
+
grand_total += total
|
|
107
|
+
total_hits += hits
|
|
108
|
+
if not args.quiet:
|
|
109
|
+
print(f" {label}: {total:.4f} cents ({hits} entr{'y' if hits == 1 else 'ies'})")
|
|
110
|
+
|
|
111
|
+
print("\n" + "=" * 60)
|
|
112
|
+
print(f"Runs processed : {len(records)}")
|
|
113
|
+
print(f"Cost entries : {total_hits}")
|
|
114
|
+
print(f"Errors : {errors}")
|
|
115
|
+
print(f"TOTAL LLM COST : {grand_total:.4f} cents (${grand_total / 100:.4f})")
|
|
116
|
+
print("=" * 60)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
if __name__ == "__main__":
|
|
120
|
+
main()
|
|
@@ -8,10 +8,10 @@ var _anthropicModel = require("../models/anthropicModel");
|
|
|
8
8
|
var _neverthrow = require("neverthrow");
|
|
9
9
|
var Errors = _interopRequireWildcard(require("../types/errors"));
|
|
10
10
|
var _utils = require("./utils");
|
|
11
|
-
var _Logger = require("../../common/Logger");
|
|
12
11
|
var _aiModelsValidations = require("../common/aiModelsValidations");
|
|
13
12
|
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
14
13
|
async function extractStructuredDataUsingClaude(input) {
|
|
14
|
+
var _unwrappedResponse$us, _unwrappedResponse$us2;
|
|
15
15
|
const {
|
|
16
16
|
entityName,
|
|
17
17
|
model,
|
|
@@ -118,17 +118,11 @@ async function extractStructuredDataUsingClaude(input) {
|
|
|
118
118
|
}
|
|
119
119
|
const result = (0, _utils.getResultFromOutputSchema)(originalJsonSchema, entityName, tool.input);
|
|
120
120
|
const callCost = response.value.response.headers.get("x-ai-cost-in-cents");
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
_Logger.logger.info(`extractor ${input.identifier}: AI cost is not calculated (using custom API key)`);
|
|
124
|
-
} else if (callCost) {
|
|
125
|
-
const cost = parseFloat(callCost);
|
|
126
|
-
if (!isNaN(cost)) {
|
|
127
|
-
_Logger.logger.info(`extractor ${input.identifier}: AI cost is $${cost / 100}`);
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
}
|
|
121
|
+
const costInCents = (0, _utils.parseCostInCents)(callCost);
|
|
122
|
+
const totalTokens = (((_unwrappedResponse$us = unwrappedResponse.usage) === null || _unwrappedResponse$us === void 0 ? void 0 : _unwrappedResponse$us.input_tokens) ?? 0) + (((_unwrappedResponse$us2 = unwrappedResponse.usage) === null || _unwrappedResponse$us2 === void 0 ? void 0 : _unwrappedResponse$us2.output_tokens) ?? 0);
|
|
131
123
|
return (0, _neverthrow.ok)({
|
|
132
|
-
result
|
|
124
|
+
result,
|
|
125
|
+
costInCents,
|
|
126
|
+
totalTokens
|
|
133
127
|
});
|
|
134
128
|
}
|
|
@@ -7,11 +7,10 @@ exports.extractStructuredDataUsingOpenAi = extractStructuredDataUsingOpenAi;
|
|
|
7
7
|
var _neverthrow = require("neverthrow");
|
|
8
8
|
var Errors = _interopRequireWildcard(require("../types/errors"));
|
|
9
9
|
var _utils = require("./utils");
|
|
10
|
-
var _Logger = require("../../common/Logger");
|
|
11
10
|
var _openaiModel = require("../models/openaiModel");
|
|
12
11
|
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
13
12
|
async function extractStructuredDataUsingOpenAi(input) {
|
|
14
|
-
var _completion$value$dat, _completion$value$dat2;
|
|
13
|
+
var _completion$value$dat, _completion$value$dat2, _completion$value$dat3;
|
|
15
14
|
const {
|
|
16
15
|
entityName,
|
|
17
16
|
model,
|
|
@@ -50,13 +49,16 @@ async function extractStructuredDataUsingOpenAi(input) {
|
|
|
50
49
|
content.push(...imageContent);
|
|
51
50
|
}
|
|
52
51
|
const modelName = input.model;
|
|
52
|
+
const supportsCustomTemperature = !/^(o\d|gpt-5)/i.test(modelName);
|
|
53
53
|
const toolName = `extract_${entityName}`;
|
|
54
54
|
const openAiInstance = (0, _openaiModel.createOpenAIInstance)({
|
|
55
55
|
apiKey
|
|
56
56
|
});
|
|
57
57
|
const completion = await (0, _neverthrow.fromPromise)(openAiInstance.chat.completions.create({
|
|
58
|
-
|
|
59
|
-
|
|
58
|
+
max_completion_tokens: 4000,
|
|
59
|
+
...(supportsCustomTemperature ? {
|
|
60
|
+
temperature: 0
|
|
61
|
+
} : {}),
|
|
60
62
|
model: modelName,
|
|
61
63
|
messages: [{
|
|
62
64
|
role: "system",
|
|
@@ -115,17 +117,11 @@ async function extractStructuredDataUsingOpenAi(input) {
|
|
|
115
117
|
const result = (0, _utils.getResultFromOutputSchema)(originalJsonSchema, entityName, parsedData.value);
|
|
116
118
|
const formatted = (0, _utils.cleanupAiResult)(result);
|
|
117
119
|
const callCost = completion.value.response.headers.get("x-ai-cost-in-cents");
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
_Logger.logger.info(`extractor ${input.identifier}: AI cost is not calculated (using custom API key)`);
|
|
121
|
-
} else if (callCost) {
|
|
122
|
-
const cost = parseFloat(callCost);
|
|
123
|
-
if (!isNaN(cost)) {
|
|
124
|
-
_Logger.logger.info(`extractor ${input.identifier}: AI cost is $${cost / 100}`);
|
|
125
|
-
}
|
|
126
|
-
}
|
|
127
|
-
}
|
|
120
|
+
const costInCents = (0, _utils.parseCostInCents)(callCost);
|
|
121
|
+
const totalTokens = (_completion$value$dat3 = completion.value.data.usage) === null || _completion$value$dat3 === void 0 ? void 0 : _completion$value$dat3.total_tokens;
|
|
128
122
|
return (0, _neverthrow.ok)({
|
|
129
|
-
result: formatted
|
|
123
|
+
result: formatted,
|
|
124
|
+
costInCents,
|
|
125
|
+
totalTokens
|
|
130
126
|
});
|
|
131
127
|
}
|
|
@@ -7,11 +7,10 @@ exports.extractStructuredDataUsingAiInstance = extractStructuredDataUsingAiInsta
|
|
|
7
7
|
var _neverthrow = require("neverthrow");
|
|
8
8
|
var Errors = _interopRequireWildcard(require("../types/errors"));
|
|
9
9
|
var _utils = require("./utils");
|
|
10
|
-
var _Logger = require("../../common/Logger");
|
|
11
10
|
var _ai = require("ai");
|
|
12
11
|
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
13
12
|
async function extractStructuredDataUsingAiInstance(input) {
|
|
14
|
-
var _apiResult$value$tool, _apiResult$value$tool2, _apiResult$value$resp;
|
|
13
|
+
var _apiResult$value$tool, _apiResult$value$tool2, _apiResult$value$resp, _apiResult$value$usag;
|
|
15
14
|
const {
|
|
16
15
|
entityName,
|
|
17
16
|
model,
|
|
@@ -20,9 +19,7 @@ async function extractStructuredDataUsingAiInstance(input) {
|
|
|
20
19
|
text,
|
|
21
20
|
extraUserMessages,
|
|
22
21
|
images,
|
|
23
|
-
|
|
24
|
-
apiName,
|
|
25
|
-
maxTokens
|
|
22
|
+
apiName
|
|
26
23
|
} = input;
|
|
27
24
|
const processedJsonSchema = (0, _utils.processInputSchema)(originalJsonSchema, entityName);
|
|
28
25
|
const content = [];
|
|
@@ -106,17 +103,11 @@ async function extractStructuredDataUsingAiInstance(input) {
|
|
|
106
103
|
const result = (0, _utils.getResultFromOutputSchema)(originalJsonSchema, entityName, extractedData);
|
|
107
104
|
const formatted = (0, _utils.cleanupAiResult)(result);
|
|
108
105
|
const callCost = (_apiResult$value$resp = apiResult.value.response.headers) === null || _apiResult$value$resp === void 0 ? void 0 : _apiResult$value$resp["x-ai-cost-in-cents"];
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
_Logger.logger.info(`extractor ${input.identifier}: AI cost is not calculated (using custom API key)`);
|
|
112
|
-
} else if (callCost) {
|
|
113
|
-
const cost = parseFloat(callCost);
|
|
114
|
-
if (!isNaN(cost)) {
|
|
115
|
-
_Logger.logger.info(`extractor ${input.identifier}: AI cost is $${cost / 100}`);
|
|
116
|
-
}
|
|
117
|
-
}
|
|
118
|
-
}
|
|
106
|
+
const costInCents = (0, _utils.parseCostInCents)(callCost);
|
|
107
|
+
const totalTokens = (_apiResult$value$usag = apiResult.value.usage) === null || _apiResult$value$usag === void 0 ? void 0 : _apiResult$value$usag.totalTokens;
|
|
119
108
|
return (0, _neverthrow.ok)({
|
|
120
|
-
result: formatted
|
|
109
|
+
result: formatted,
|
|
110
|
+
costInCents,
|
|
111
|
+
totalTokens
|
|
121
112
|
});
|
|
122
113
|
}
|
|
@@ -13,6 +13,7 @@ var _extractStructuredDataUsingOpenAi = require("./extractStructuredDataUsingOpe
|
|
|
13
13
|
var _utils = require("./utils");
|
|
14
14
|
var _extractStructuredDataUsingGoogle = require("./extractStructuredDataUsingGoogle");
|
|
15
15
|
var _getModelProvider = require("../../common/getModelProvider");
|
|
16
|
+
var _Logger = require("../../common/Logger");
|
|
16
17
|
function isClaudeModel(model) {
|
|
17
18
|
return (0, _getModelProvider.getModelProvider)(model) === "anthropic";
|
|
18
19
|
}
|
|
@@ -47,9 +48,18 @@ async function extractStructuredDataUsingAi(input) {
|
|
|
47
48
|
return (0, _neverthrow.err)(extractionResult.error);
|
|
48
49
|
}
|
|
49
50
|
const {
|
|
50
|
-
result
|
|
51
|
+
result,
|
|
52
|
+
costInCents,
|
|
53
|
+
totalTokens
|
|
51
54
|
} = extractionResult.value;
|
|
55
|
+
if (costInCents !== undefined) {
|
|
56
|
+
_Logger.logger.info(`Total LLM Cost In Cents: ${costInCents}`);
|
|
57
|
+
} else if (totalTokens !== undefined) {
|
|
58
|
+
_Logger.logger.info(`Total LLM Tokens: ${totalTokens}`);
|
|
59
|
+
}
|
|
52
60
|
return (0, _neverthrow.ok)({
|
|
53
|
-
result: (0, _utils.cleanupAiResult)(result)
|
|
61
|
+
result: (0, _utils.cleanupAiResult)(result),
|
|
62
|
+
costInCents,
|
|
63
|
+
totalTokens
|
|
54
64
|
});
|
|
55
65
|
}
|
|
@@ -6,11 +6,17 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
6
6
|
exports.cleanupAiResult = cleanupAiResult;
|
|
7
7
|
exports.getRandomItems = getRandomItems;
|
|
8
8
|
exports.getResultFromOutputSchema = getResultFromOutputSchema;
|
|
9
|
+
exports.parseCostInCents = parseCostInCents;
|
|
9
10
|
exports.processInputSchema = processInputSchema;
|
|
10
11
|
function getRandomItems(arr, numItems) {
|
|
11
12
|
const shuffled = arr.sort(() => 0.5 - Math.random());
|
|
12
13
|
return shuffled.slice(0, numItems);
|
|
13
14
|
}
|
|
15
|
+
function parseCostInCents(headerValue) {
|
|
16
|
+
if (!headerValue) return undefined;
|
|
17
|
+
const cost = parseFloat(headerValue);
|
|
18
|
+
return isNaN(cost) ? undefined : cost;
|
|
19
|
+
}
|
|
14
20
|
function processInputSchema(originalJsonSchema, entityName) {
|
|
15
21
|
const internalSchema = structuredClone(originalJsonSchema);
|
|
16
22
|
delete internalSchema.description;
|