@maintainabilityai/research-runner 0.1.15 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/search/uspto-client.js +39 -3
- package/package.json +1 -1
|
@@ -71,7 +71,10 @@ async function usptoSearch(opts) {
|
|
|
71
71
|
patentNumber: num,
|
|
72
72
|
title: meta.inventionTitle ?? '',
|
|
73
73
|
abstract: '',
|
|
74
|
-
|
|
74
|
+
// earliestPublicationNumber already carries the `US` prefix (e.g.
|
|
75
|
+
// `US20260064729A1`); patentNumber is plain digits. Avoid the double
|
|
76
|
+
// `USUS…` URL we were producing.
|
|
77
|
+
url: num ? `https://patents.google.com/patent/${num.startsWith('US') ? num : `US${num}`}` : '',
|
|
75
78
|
grantedAt: meta.grantDate || meta.filingDate || meta.effectiveFilingDate || '',
|
|
76
79
|
inventors: meta.firstInventorName ? [meta.firstInventorName] : [],
|
|
77
80
|
_xmlUri: xmlUri,
|
|
@@ -80,16 +83,39 @@ async function usptoSearch(opts) {
|
|
|
80
83
|
// Stage 2: parallel best-effort abstract fetch. The full-text XML carries
|
|
81
84
|
// the <abstract> element; we regex it out rather than parsing the whole
|
|
82
85
|
// document (the XML is large and we only want the abstract).
|
|
86
|
+
//
|
|
87
|
+
// Telemetry: count how many we attempted and how many succeeded so the
|
|
88
|
+
// archeologist progress log can surface "uspto abstracts: 3/5" instead of
|
|
89
|
+
// silently shipping empty `>` blockquotes to the synth agent.
|
|
90
|
+
let attempted = 0;
|
|
91
|
+
let succeeded = 0;
|
|
92
|
+
let missingUri = 0;
|
|
93
|
+
const failureCauses = [];
|
|
83
94
|
await Promise.all(stage1.map(async (r) => {
|
|
84
95
|
if (!r._xmlUri) {
|
|
96
|
+
missingUri += 1;
|
|
85
97
|
return;
|
|
86
98
|
}
|
|
99
|
+
attempted += 1;
|
|
87
100
|
try {
|
|
88
101
|
const xmlRes = await fetchImpl(r._xmlUri, {
|
|
89
102
|
method: 'GET',
|
|
90
|
-
|
|
103
|
+
// Mirror the NCMS reference exactly: same headers dict on the
|
|
104
|
+
// search call AND the XML fetch (X-API-Key + Accept: json). My
|
|
105
|
+
// earlier "drop X-API-Key on stage 2" was wrong — the CDN does
|
|
106
|
+
// gate on it, and removing it produced http403 across the
|
|
107
|
+
// board (seen in #49 logs: "0/5 fetched, failures: http403 ×5").
|
|
108
|
+
// The Accept header looks wrong-shape (json for an XML doc) but
|
|
109
|
+
// the upstream ignores it and returns XML regardless; matches
|
|
110
|
+
// the NCMS pattern that works in production.
|
|
111
|
+
headers: {
|
|
112
|
+
accept: 'application/json',
|
|
113
|
+
'X-API-Key': opts.apiKey,
|
|
114
|
+
'user-agent': 'maintainabilityai-research-runner/1.0',
|
|
115
|
+
},
|
|
91
116
|
});
|
|
92
117
|
if (!xmlRes.ok) {
|
|
118
|
+
failureCauses.push(`http${xmlRes.status}`);
|
|
93
119
|
return;
|
|
94
120
|
}
|
|
95
121
|
const xml = await xmlRes.text();
|
|
@@ -97,10 +123,20 @@ async function usptoSearch(opts) {
|
|
|
97
123
|
if (m) {
|
|
98
124
|
const stripped = m[1].replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
|
|
99
125
|
r.abstract = stripped.slice(0, 1000);
|
|
126
|
+
succeeded += 1;
|
|
100
127
|
}
|
|
128
|
+
else {
|
|
129
|
+
failureCauses.push('no-abstract-tag');
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
catch (err) {
|
|
133
|
+
failureCauses.push(err instanceof Error ? err.name : 'unknown');
|
|
101
134
|
}
|
|
102
|
-
catch { /* ignore — best-effort */ }
|
|
103
135
|
}));
|
|
136
|
+
if (records.length > 0 && process.env.RESEARCH_RUNNER_QUIET !== '1') {
|
|
137
|
+
process.stderr.write(`[research-runner] uspto abstracts: ${succeeded}/${attempted} fetched ` +
|
|
138
|
+
`(${missingUri} record(s) had no XML URI; failures: ${failureCauses.join(',') || 'none'})\n`);
|
|
139
|
+
}
|
|
104
140
|
// Drop the internal _xmlUri marker before returning.
|
|
105
141
|
const results = stage1.map(({ _xmlUri: _ignored, ...rest }) => rest);
|
|
106
142
|
return {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@maintainabilityai/research-runner",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.17",
|
|
4
4
|
"description": "Research + PRD agent runner — orchestrates the Archeologist and PRD pipelines for the MaintainabilityAI governance mesh",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "MaintainabilityAI",
|