@intuned/browser-dev 0.1.17-dev.0 → 0.1.17-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/optimized-extractors/common/extractStructuredDataUsingClaude.js +3 -3
- package/dist/optimized-extractors/common/extractStructuredDataUsingOpenAi.js +5 -5
- package/dist/optimized-extractors/common/extractStrucutredDataUsingAiInstance.js +4 -4
- package/dist/optimized-extractors/common/index.js +0 -6
- package/dist/optimized-extractors/common/utils.js +9 -0
- package/package.json +1 -1
- package/accumulate_llm_cost.py +0 -120
|
@@ -96,6 +96,9 @@ async function extractStructuredDataUsingClaude(input) {
|
|
|
96
96
|
return (0, _neverthrow.err)(response.error);
|
|
97
97
|
}
|
|
98
98
|
const unwrappedResponse = response.value.data;
|
|
99
|
+
const costInCents = (0, _utils.parseCostInCents)(response.value.response.headers.get("x-ai-cost-in-cents"));
|
|
100
|
+
const totalTokens = (((_unwrappedResponse$us = unwrappedResponse.usage) === null || _unwrappedResponse$us === void 0 ? void 0 : _unwrappedResponse$us.input_tokens) ?? 0) + (((_unwrappedResponse$us2 = unwrappedResponse.usage) === null || _unwrappedResponse$us2 === void 0 ? void 0 : _unwrappedResponse$us2.output_tokens) ?? 0);
|
|
101
|
+
(0, _utils.logAiCallUsage)(costInCents, totalTokens);
|
|
99
102
|
if (unwrappedResponse.stop_reason === "max_tokens") {
|
|
100
103
|
return (0, _neverthrow.err)(Errors.AiCallFailed("response from ai exceeds model maximum output tokens, try to be more specific with what data you need to extract"));
|
|
101
104
|
}
|
|
@@ -117,9 +120,6 @@ async function extractStructuredDataUsingClaude(input) {
|
|
|
117
120
|
return (0, _neverthrow.err)(Errors.invalidExtractionResult("the model was not able to extract data correctly"));
|
|
118
121
|
}
|
|
119
122
|
const result = (0, _utils.getResultFromOutputSchema)(originalJsonSchema, entityName, tool.input);
|
|
120
|
-
const callCost = response.value.response.headers.get("x-ai-cost-in-cents");
|
|
121
|
-
const costInCents = (0, _utils.parseCostInCents)(callCost);
|
|
122
|
-
const totalTokens = (((_unwrappedResponse$us = unwrappedResponse.usage) === null || _unwrappedResponse$us === void 0 ? void 0 : _unwrappedResponse$us.input_tokens) ?? 0) + (((_unwrappedResponse$us2 = unwrappedResponse.usage) === null || _unwrappedResponse$us2 === void 0 ? void 0 : _unwrappedResponse$us2.output_tokens) ?? 0);
|
|
123
123
|
return (0, _neverthrow.ok)({
|
|
124
124
|
result,
|
|
125
125
|
costInCents,
|
|
@@ -92,14 +92,17 @@ async function extractStructuredDataUsingOpenAi(input) {
|
|
|
92
92
|
if (completion.isErr()) {
|
|
93
93
|
return (0, _neverthrow.err)(completion.error);
|
|
94
94
|
}
|
|
95
|
+
const costInCents = (0, _utils.parseCostInCents)(completion.value.response.headers.get("x-ai-cost-in-cents"));
|
|
96
|
+
const totalTokens = (_completion$value$dat = completion.value.data.usage) === null || _completion$value$dat === void 0 ? void 0 : _completion$value$dat.total_tokens;
|
|
97
|
+
(0, _utils.logAiCallUsage)(costInCents, totalTokens);
|
|
95
98
|
if (completion.value.data.choices[0].finish_reason === "length") {
|
|
96
99
|
return (0, _neverthrow.err)(Errors.AiCallFailed("response from ai exceeds model maximum output tokens, try to be more specific with what data you need to extract"));
|
|
97
100
|
}
|
|
98
|
-
const noDataFound = (_completion$value$
|
|
101
|
+
const noDataFound = (_completion$value$dat2 = completion.value.data.choices[0].message.tool_calls) === null || _completion$value$dat2 === void 0 ? void 0 : _completion$value$dat2.some(content => content.type === "function" && content.function.name == "no_data_found");
|
|
99
102
|
if (noDataFound) {
|
|
100
103
|
return (0, _neverthrow.err)(Errors.NoDataFound("data isn't found in the text or images."));
|
|
101
104
|
}
|
|
102
|
-
let functionCall = (_completion$value$
|
|
105
|
+
let functionCall = (_completion$value$dat3 = completion.value.data.choices[0].message.tool_calls) === null || _completion$value$dat3 === void 0 || (_completion$value$dat3 = _completion$value$dat3.find(t => t.type === "function" && t.function.name === toolName)) === null || _completion$value$dat3 === void 0 ? void 0 : _completion$value$dat3.function;
|
|
103
106
|
if (!functionCall) {
|
|
104
107
|
functionCall = completion.value.data.choices[0].message.function_call;
|
|
105
108
|
if (!functionCall) {
|
|
@@ -116,9 +119,6 @@ async function extractStructuredDataUsingOpenAi(input) {
|
|
|
116
119
|
}
|
|
117
120
|
const result = (0, _utils.getResultFromOutputSchema)(originalJsonSchema, entityName, parsedData.value);
|
|
118
121
|
const formatted = (0, _utils.cleanupAiResult)(result);
|
|
119
|
-
const callCost = completion.value.response.headers.get("x-ai-cost-in-cents");
|
|
120
|
-
const costInCents = (0, _utils.parseCostInCents)(callCost);
|
|
121
|
-
const totalTokens = (_completion$value$dat3 = completion.value.data.usage) === null || _completion$value$dat3 === void 0 ? void 0 : _completion$value$dat3.total_tokens;
|
|
122
122
|
return (0, _neverthrow.ok)({
|
|
123
123
|
result: formatted,
|
|
124
124
|
costInCents,
|
|
@@ -10,7 +10,7 @@ var _utils = require("./utils");
|
|
|
10
10
|
var _ai = require("ai");
|
|
11
11
|
function _interopRequireWildcard(e, t) { if ("function" == typeof WeakMap) var r = new WeakMap(), n = new WeakMap(); return (_interopRequireWildcard = function (e, t) { if (!t && e && e.__esModule) return e; var o, i, f = { __proto__: null, default: e }; if (null === e || "object" != typeof e && "function" != typeof e) return f; if (o = t ? n : r) { if (o.has(e)) return o.get(e); o.set(e, f); } for (const t in e) "default" !== t && {}.hasOwnProperty.call(e, t) && ((i = (o = Object.defineProperty) && Object.getOwnPropertyDescriptor(e, t)) && (i.get || i.set) ? o(f, t, i) : f[t] = e[t]); return f; })(e, t); }
|
|
12
12
|
async function extractStructuredDataUsingAiInstance(input) {
|
|
13
|
-
var _apiResult$value$
|
|
13
|
+
var _apiResult$value$resp, _apiResult$value$usag, _apiResult$value$tool, _apiResult$value$tool2;
|
|
14
14
|
const {
|
|
15
15
|
entityName,
|
|
16
16
|
model,
|
|
@@ -85,6 +85,9 @@ async function extractStructuredDataUsingAiInstance(input) {
|
|
|
85
85
|
if (apiResult.isErr()) {
|
|
86
86
|
return (0, _neverthrow.err)(apiResult.error);
|
|
87
87
|
}
|
|
88
|
+
const costInCents = (0, _utils.parseCostInCents)((_apiResult$value$resp = apiResult.value.response.headers) === null || _apiResult$value$resp === void 0 ? void 0 : _apiResult$value$resp["x-ai-cost-in-cents"]);
|
|
89
|
+
const totalTokens = (_apiResult$value$usag = apiResult.value.usage) === null || _apiResult$value$usag === void 0 ? void 0 : _apiResult$value$usag.totalTokens;
|
|
90
|
+
(0, _utils.logAiCallUsage)(costInCents, totalTokens);
|
|
88
91
|
if (apiResult.value.finishReason === "length") {
|
|
89
92
|
return (0, _neverthrow.err)(Errors.AiCallFailed("response from ai exceeds model maximum output tokens, try to be more specific with what data you need to extract"));
|
|
90
93
|
}
|
|
@@ -102,9 +105,6 @@ async function extractStructuredDataUsingAiInstance(input) {
|
|
|
102
105
|
}
|
|
103
106
|
const result = (0, _utils.getResultFromOutputSchema)(originalJsonSchema, entityName, extractedData);
|
|
104
107
|
const formatted = (0, _utils.cleanupAiResult)(result);
|
|
105
|
-
const callCost = (_apiResult$value$resp = apiResult.value.response.headers) === null || _apiResult$value$resp === void 0 ? void 0 : _apiResult$value$resp["x-ai-cost-in-cents"];
|
|
106
|
-
const costInCents = (0, _utils.parseCostInCents)(callCost);
|
|
107
|
-
const totalTokens = (_apiResult$value$usag = apiResult.value.usage) === null || _apiResult$value$usag === void 0 ? void 0 : _apiResult$value$usag.totalTokens;
|
|
108
108
|
return (0, _neverthrow.ok)({
|
|
109
109
|
result: formatted,
|
|
110
110
|
costInCents,
|
|
@@ -13,7 +13,6 @@ var _extractStructuredDataUsingOpenAi = require("./extractStructuredDataUsingOpe
|
|
|
13
13
|
var _utils = require("./utils");
|
|
14
14
|
var _extractStructuredDataUsingGoogle = require("./extractStructuredDataUsingGoogle");
|
|
15
15
|
var _getModelProvider = require("../../common/getModelProvider");
|
|
16
|
-
var _Logger = require("../../common/Logger");
|
|
17
16
|
function isClaudeModel(model) {
|
|
18
17
|
return (0, _getModelProvider.getModelProvider)(model) === "anthropic";
|
|
19
18
|
}
|
|
@@ -52,11 +51,6 @@ async function extractStructuredDataUsingAi(input) {
|
|
|
52
51
|
costInCents,
|
|
53
52
|
totalTokens
|
|
54
53
|
} = extractionResult.value;
|
|
55
|
-
if (costInCents !== undefined) {
|
|
56
|
-
_Logger.logger.info(`Total LLM Cost In Cents: ${costInCents}`);
|
|
57
|
-
} else if (totalTokens !== undefined) {
|
|
58
|
-
_Logger.logger.info(`Total LLM Tokens: ${totalTokens}`);
|
|
59
|
-
}
|
|
60
54
|
return (0, _neverthrow.ok)({
|
|
61
55
|
result: (0, _utils.cleanupAiResult)(result),
|
|
62
56
|
costInCents,
|
|
@@ -6,8 +6,10 @@ Object.defineProperty(exports, "__esModule", {
|
|
|
6
6
|
exports.cleanupAiResult = cleanupAiResult;
|
|
7
7
|
exports.getRandomItems = getRandomItems;
|
|
8
8
|
exports.getResultFromOutputSchema = getResultFromOutputSchema;
|
|
9
|
+
exports.logAiCallUsage = logAiCallUsage;
|
|
9
10
|
exports.parseCostInCents = parseCostInCents;
|
|
10
11
|
exports.processInputSchema = processInputSchema;
|
|
12
|
+
var _Logger = require("../../common/Logger");
|
|
11
13
|
function getRandomItems(arr, numItems) {
|
|
12
14
|
const shuffled = arr.sort(() => 0.5 - Math.random());
|
|
13
15
|
return shuffled.slice(0, numItems);
|
|
@@ -17,6 +19,13 @@ function parseCostInCents(headerValue) {
|
|
|
17
19
|
const cost = parseFloat(headerValue);
|
|
18
20
|
return isNaN(cost) ? undefined : cost;
|
|
19
21
|
}
|
|
22
|
+
function logAiCallUsage(costInCents, totalTokens) {
|
|
23
|
+
if (costInCents !== undefined) {
|
|
24
|
+
_Logger.logger.info(`Total LLM Cost In Cents: ${costInCents}`);
|
|
25
|
+
} else if (totalTokens !== undefined) {
|
|
26
|
+
_Logger.logger.info(`Total LLM Tokens: ${totalTokens}`);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
20
29
|
function processInputSchema(originalJsonSchema, entityName) {
|
|
21
30
|
const internalSchema = structuredClone(originalJsonSchema);
|
|
22
31
|
delete internalSchema.description;
|
package/package.json
CHANGED
package/accumulate_llm_cost.py
DELETED
|
@@ -1,120 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Accumulate total LLM cost (in cents) across all runs in a results JSONL file.
|
|
4
|
-
|
|
5
|
-
For each line in the results JSONL:
|
|
6
|
-
- read the `log_url` field
|
|
7
|
-
- download that logs JSONL
|
|
8
|
-
- find every log line whose message contains "Total LLM Cost In Cents: <number>"
|
|
9
|
-
- sum all those numbers
|
|
10
|
-
|
|
11
|
-
Usage:
|
|
12
|
-
python accumulate_llm_cost.py /path/to/results.jsonl
|
|
13
|
-
python accumulate_llm_cost.py /path/to/results.jsonl --workers 16
|
|
14
|
-
"""
|
|
15
|
-
|
|
16
|
-
import argparse
|
|
17
|
-
import json
|
|
18
|
-
import re
|
|
19
|
-
import sys
|
|
20
|
-
import urllib.request
|
|
21
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
22
|
-
|
|
23
|
-
# Matches: "Total LLM Cost In Cents: 01.1" -> captures "01.1"
|
|
24
|
-
COST_RE = re.compile(r"Total LLM Cost In Cents:\s*([0-9]*\.?[0-9]+)", re.IGNORECASE)
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def download_text(url: str, timeout: int = 120) -> str:
|
|
28
|
-
req = urllib.request.Request(url, headers={"User-Agent": "llm-cost-accumulator"})
|
|
29
|
-
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
|
30
|
-
return resp.read().decode("utf-8", errors="replace")
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def costs_from_log_text(text: str):
|
|
34
|
-
"""Yield every cost value found in a logs JSONL blob."""
|
|
35
|
-
for line in text.splitlines():
|
|
36
|
-
line = line.strip()
|
|
37
|
-
if not line:
|
|
38
|
-
continue
|
|
39
|
-
message = None
|
|
40
|
-
try:
|
|
41
|
-
obj = json.loads(line)
|
|
42
|
-
message = obj.get("message")
|
|
43
|
-
except json.JSONDecodeError:
|
|
44
|
-
# fall back to scanning the raw line
|
|
45
|
-
message = line
|
|
46
|
-
if not message:
|
|
47
|
-
continue
|
|
48
|
-
for m in COST_RE.finditer(str(message)):
|
|
49
|
-
yield float(m.group(1))
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def process_run(record: dict):
|
|
53
|
-
"""Return (label, total_cents, hit_count, error) for one results record."""
|
|
54
|
-
api = record.get("apiInfo", {})
|
|
55
|
-
name = api.get("name", "?")
|
|
56
|
-
run_id = api.get("runId") or record.get("projectJobRun", {}).get("id", "?")
|
|
57
|
-
label = f"{name} / {run_id}"
|
|
58
|
-
|
|
59
|
-
log_url = api.get("log_url")
|
|
60
|
-
if not log_url:
|
|
61
|
-
return label, 0.0, 0, "no log_url"
|
|
62
|
-
|
|
63
|
-
try:
|
|
64
|
-
text = download_text(log_url)
|
|
65
|
-
except Exception as e: # noqa: BLE001
|
|
66
|
-
return label, 0.0, 0, f"download failed: {e}"
|
|
67
|
-
|
|
68
|
-
costs = list(costs_from_log_text(text))
|
|
69
|
-
return label, sum(costs), len(costs), None
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def main():
|
|
73
|
-
parser = argparse.ArgumentParser(description=__doc__)
|
|
74
|
-
parser.add_argument("results", help="Path to the results JSONL file")
|
|
75
|
-
parser.add_argument("--workers", type=int, default=8, help="Parallel downloads")
|
|
76
|
-
parser.add_argument("--quiet", action="store_true", help="Only print the grand total")
|
|
77
|
-
args = parser.parse_args()
|
|
78
|
-
|
|
79
|
-
with open(args.results, "r", encoding="utf-8") as f:
|
|
80
|
-
records = []
|
|
81
|
-
for ln, line in enumerate(f, 1):
|
|
82
|
-
line = line.strip()
|
|
83
|
-
if not line:
|
|
84
|
-
continue
|
|
85
|
-
try:
|
|
86
|
-
records.append(json.loads(line))
|
|
87
|
-
except json.JSONDecodeError as e:
|
|
88
|
-
print(f" [warn] skipping malformed line {ln}: {e}", file=sys.stderr)
|
|
89
|
-
|
|
90
|
-
if not args.quiet:
|
|
91
|
-
print(f"Loaded {len(records)} run(s) from {args.results}\n")
|
|
92
|
-
|
|
93
|
-
grand_total = 0.0
|
|
94
|
-
total_hits = 0
|
|
95
|
-
errors = 0
|
|
96
|
-
|
|
97
|
-
with ThreadPoolExecutor(max_workers=args.workers) as pool:
|
|
98
|
-
futures = {pool.submit(process_run, rec): rec for rec in records}
|
|
99
|
-
for fut in as_completed(futures):
|
|
100
|
-
label, total, hits, err = fut.result()
|
|
101
|
-
if err:
|
|
102
|
-
errors += 1
|
|
103
|
-
if not args.quiet:
|
|
104
|
-
print(f" [error] {label}: {err}")
|
|
105
|
-
continue
|
|
106
|
-
grand_total += total
|
|
107
|
-
total_hits += hits
|
|
108
|
-
if not args.quiet:
|
|
109
|
-
print(f" {label}: {total:.4f} cents ({hits} entr{'y' if hits == 1 else 'ies'})")
|
|
110
|
-
|
|
111
|
-
print("\n" + "=" * 60)
|
|
112
|
-
print(f"Runs processed : {len(records)}")
|
|
113
|
-
print(f"Cost entries : {total_hits}")
|
|
114
|
-
print(f"Errors : {errors}")
|
|
115
|
-
print(f"TOTAL LLM COST : {grand_total:.4f} cents (${grand_total / 100:.4f})")
|
|
116
|
-
print("=" * 60)
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
if __name__ == "__main__":
|
|
120
|
-
main()
|