@semiont/make-meaning 0.5.6 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -32
- package/dist/index.d.ts +198 -23
- package/dist/index.js +756 -224
- package/dist/index.js.map +1 -1
- package/dist/smelter-main.js +552 -244
- package/dist/smelter-main.js.map +1 -1
- package/package.json +12 -12
package/dist/smelter-main.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { createTomlConfigLoader, accessToken, baseUrl as baseUrl$1, burstBuffer, errField, resourceId, textExtractionOf, decodeRepresentation, getTargetSelector, getExactText, annotationId, getPrimaryMediaType, getPrimaryRepresentation } from '@semiont/core';
|
|
2
|
+
import { calculateChecksum } from '@semiont/content';
|
|
3
3
|
import { createEmbeddingProvider, createVectorStore, chunkText } from '@semiont/vectors';
|
|
4
|
+
import { registerVectorIndexSizeProvider, withActorSpan } from '@semiont/observability';
|
|
5
|
+
import { busRequest } from '@semiont/sdk';
|
|
6
|
+
import { HttpTransport, HttpContentTransport } from '@semiont/http-transport';
|
|
4
7
|
import { createServer } from 'http';
|
|
5
8
|
import { existsSync, readFileSync } from 'fs';
|
|
6
9
|
import { homedir } from 'os';
|
|
@@ -14,7 +17,11 @@ var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
|
14
17
|
var __getProtoOf = Object.getPrototypeOf;
|
|
15
18
|
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
16
19
|
var __commonJS = (cb, mod) => function __require() {
|
|
17
|
-
|
|
20
|
+
try {
|
|
21
|
+
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
22
|
+
} catch (e) {
|
|
23
|
+
throw mod = 0, e;
|
|
24
|
+
}
|
|
18
25
|
};
|
|
19
26
|
var __copyProps = (to, from2, except, desc) => {
|
|
20
27
|
if (from2 && typeof from2 === "object" || typeof from2 === "function") {
|
|
@@ -699,19 +706,19 @@ var require_Observable = __commonJS({
|
|
|
699
706
|
var config_1 = require_config();
|
|
700
707
|
var isFunction_1 = require_isFunction();
|
|
701
708
|
var errorContext_1 = require_errorContext();
|
|
702
|
-
var
|
|
703
|
-
function
|
|
709
|
+
var Observable3 = (function() {
|
|
710
|
+
function Observable4(subscribe) {
|
|
704
711
|
if (subscribe) {
|
|
705
712
|
this._subscribe = subscribe;
|
|
706
713
|
}
|
|
707
714
|
}
|
|
708
|
-
|
|
709
|
-
var observable = new
|
|
715
|
+
Observable4.prototype.lift = function(operator) {
|
|
716
|
+
var observable = new Observable4();
|
|
710
717
|
observable.source = this;
|
|
711
718
|
observable.operator = operator;
|
|
712
719
|
return observable;
|
|
713
720
|
};
|
|
714
|
-
|
|
721
|
+
Observable4.prototype.subscribe = function(observerOrNext, error, complete) {
|
|
715
722
|
var _this = this;
|
|
716
723
|
var subscriber = isSubscriber(observerOrNext) ? observerOrNext : new Subscriber_1.SafeSubscriber(observerOrNext, error, complete);
|
|
717
724
|
errorContext_1.errorContext(function() {
|
|
@@ -720,14 +727,14 @@ var require_Observable = __commonJS({
|
|
|
720
727
|
});
|
|
721
728
|
return subscriber;
|
|
722
729
|
};
|
|
723
|
-
|
|
730
|
+
Observable4.prototype._trySubscribe = function(sink) {
|
|
724
731
|
try {
|
|
725
732
|
return this._subscribe(sink);
|
|
726
733
|
} catch (err) {
|
|
727
734
|
sink.error(err);
|
|
728
735
|
}
|
|
729
736
|
};
|
|
730
|
-
|
|
737
|
+
Observable4.prototype.forEach = function(next, promiseCtor) {
|
|
731
738
|
var _this = this;
|
|
732
739
|
promiseCtor = getPromiseCtor(promiseCtor);
|
|
733
740
|
return new promiseCtor(function(resolve, reject) {
|
|
@@ -746,21 +753,21 @@ var require_Observable = __commonJS({
|
|
|
746
753
|
_this.subscribe(subscriber);
|
|
747
754
|
});
|
|
748
755
|
};
|
|
749
|
-
|
|
756
|
+
Observable4.prototype._subscribe = function(subscriber) {
|
|
750
757
|
var _a;
|
|
751
758
|
return (_a = this.source) === null || _a === void 0 ? void 0 : _a.subscribe(subscriber);
|
|
752
759
|
};
|
|
753
|
-
|
|
760
|
+
Observable4.prototype[observable_1.observable] = function() {
|
|
754
761
|
return this;
|
|
755
762
|
};
|
|
756
|
-
|
|
763
|
+
Observable4.prototype.pipe = function() {
|
|
757
764
|
var operations = [];
|
|
758
765
|
for (var _i = 0; _i < arguments.length; _i++) {
|
|
759
766
|
operations[_i] = arguments[_i];
|
|
760
767
|
}
|
|
761
768
|
return pipe_1.pipeFromArray(operations)(this);
|
|
762
769
|
};
|
|
763
|
-
|
|
770
|
+
Observable4.prototype.toPromise = function(promiseCtor) {
|
|
764
771
|
var _this = this;
|
|
765
772
|
promiseCtor = getPromiseCtor(promiseCtor);
|
|
766
773
|
return new promiseCtor(function(resolve, reject) {
|
|
@@ -774,12 +781,12 @@ var require_Observable = __commonJS({
|
|
|
774
781
|
});
|
|
775
782
|
});
|
|
776
783
|
};
|
|
777
|
-
|
|
778
|
-
return new
|
|
784
|
+
Observable4.create = function(subscribe) {
|
|
785
|
+
return new Observable4(subscribe);
|
|
779
786
|
};
|
|
780
|
-
return
|
|
787
|
+
return Observable4;
|
|
781
788
|
})();
|
|
782
|
-
exports.Observable =
|
|
789
|
+
exports.Observable = Observable3;
|
|
783
790
|
function getPromiseCtor(promiseCtor) {
|
|
784
791
|
var _a;
|
|
785
792
|
return (_a = promiseCtor !== null && promiseCtor !== void 0 ? promiseCtor : config_1.config.Promise) !== null && _a !== void 0 ? _a : Promise;
|
|
@@ -9716,8 +9723,7 @@ var require_operators = __commonJS({
|
|
|
9716
9723
|
});
|
|
9717
9724
|
|
|
9718
9725
|
// src/smelter-main.ts
|
|
9719
|
-
var
|
|
9720
|
-
var import_operators2 = __toESM(require_operators());
|
|
9726
|
+
var import_rxjs3 = __toESM(require_cjs());
|
|
9721
9727
|
|
|
9722
9728
|
// src/smelter-actor-state-unit.ts
|
|
9723
9729
|
var import_rxjs = __toESM(require_cjs());
|
|
@@ -9746,7 +9752,6 @@ function createSmelterActorStateUnit(options) {
|
|
|
9746
9752
|
);
|
|
9747
9753
|
return {
|
|
9748
9754
|
events$,
|
|
9749
|
-
emit: (channel, payload) => bus.emit(channel, payload),
|
|
9750
9755
|
start: () => {
|
|
9751
9756
|
if (started) return;
|
|
9752
9757
|
started = true;
|
|
@@ -9758,8 +9763,493 @@ function createSmelterActorStateUnit(options) {
|
|
|
9758
9763
|
};
|
|
9759
9764
|
}
|
|
9760
9765
|
|
|
9761
|
-
// src/smelter
|
|
9762
|
-
var
|
|
9766
|
+
// src/smelter.ts
|
|
9767
|
+
var import_rxjs2 = __toESM(require_cjs());
|
|
9768
|
+
var import_operators2 = __toESM(require_operators());
|
|
9769
|
+
|
|
9770
|
+
// src/batch-utils.ts
|
|
9771
|
+
function partitionByType(events) {
|
|
9772
|
+
const runs = [];
|
|
9773
|
+
let currentRun = [];
|
|
9774
|
+
for (const event of events) {
|
|
9775
|
+
if (currentRun.length > 0 && currentRun[0].type !== event.type) {
|
|
9776
|
+
runs.push(currentRun);
|
|
9777
|
+
currentRun = [];
|
|
9778
|
+
}
|
|
9779
|
+
currentRun.push(event);
|
|
9780
|
+
}
|
|
9781
|
+
if (currentRun.length > 0) runs.push(currentRun);
|
|
9782
|
+
return runs;
|
|
9783
|
+
}
|
|
9784
|
+
|
|
9785
|
+
// src/smelter.ts
|
|
9786
|
+
function isWorkItem(input) {
|
|
9787
|
+
return input.type.startsWith("smelt:");
|
|
9788
|
+
}
|
|
9789
|
+
var Smelter = class _Smelter {
|
|
9790
|
+
constructor(events$, vectorStore, embeddingProvider, content, bus, chunkingConfig2, timing, logger2) {
|
|
9791
|
+
this.events$ = events$;
|
|
9792
|
+
this.vectorStore = vectorStore;
|
|
9793
|
+
this.embeddingProvider = embeddingProvider;
|
|
9794
|
+
this.content = content;
|
|
9795
|
+
this.bus = bus;
|
|
9796
|
+
this.chunkingConfig = chunkingConfig2;
|
|
9797
|
+
this.timing = timing;
|
|
9798
|
+
this.logger = logger2;
|
|
9799
|
+
}
|
|
9800
|
+
events$;
|
|
9801
|
+
vectorStore;
|
|
9802
|
+
embeddingProvider;
|
|
9803
|
+
content;
|
|
9804
|
+
bus;
|
|
9805
|
+
chunkingConfig;
|
|
9806
|
+
timing;
|
|
9807
|
+
logger;
|
|
9808
|
+
static RECONCILE_PAGE_SIZE = 200;
|
|
9809
|
+
/** Bound on concurrently in-flight reconcile work — a cold rebuild must not fan out unbounded embedding calls. */
|
|
9810
|
+
static RECONCILE_WAVE = 8;
|
|
9811
|
+
eventSubject = new import_rxjs2.Subject();
|
|
9812
|
+
sourceSubscription = null;
|
|
9813
|
+
pipelineSubscription = null;
|
|
9814
|
+
_eventsProcessed = 0;
|
|
9815
|
+
_reconcileState = { phase: "pending" };
|
|
9816
|
+
workDone = 0;
|
|
9817
|
+
workWaiter = null;
|
|
9818
|
+
get eventsProcessed() {
|
|
9819
|
+
return this._eventsProcessed;
|
|
9820
|
+
}
|
|
9821
|
+
get reconcileState() {
|
|
9822
|
+
return this._reconcileState;
|
|
9823
|
+
}
|
|
9824
|
+
initialize() {
|
|
9825
|
+
this.pipelineSubscription = this.eventSubject.pipe(
|
|
9826
|
+
(0, import_operators2.groupBy)((e) => e.resourceId ?? "__unknown__"),
|
|
9827
|
+
(0, import_operators2.mergeMap)(
|
|
9828
|
+
(group) => group.pipe(
|
|
9829
|
+
burstBuffer({
|
|
9830
|
+
burstWindowMs: this.timing.burstWindowMs,
|
|
9831
|
+
maxBatchSize: this.timing.maxBatchSize,
|
|
9832
|
+
idleTimeoutMs: this.timing.idleTimeoutMs
|
|
9833
|
+
}),
|
|
9834
|
+
(0, import_operators2.concatMap)((inputOrBatch) => {
|
|
9835
|
+
if (Array.isArray(inputOrBatch)) {
|
|
9836
|
+
return (0, import_rxjs2.from)(
|
|
9837
|
+
withActorSpan("smelter", "batch", async () => {
|
|
9838
|
+
this._eventsProcessed += await this.processBatch(inputOrBatch);
|
|
9839
|
+
}, { "batch.size": inputOrBatch.length })
|
|
9840
|
+
);
|
|
9841
|
+
}
|
|
9842
|
+
return (0, import_rxjs2.from)(
|
|
9843
|
+
withActorSpan("smelter", inputOrBatch.type, async () => {
|
|
9844
|
+
const ok = await this.safeProcessEvent(inputOrBatch);
|
|
9845
|
+
if (isWorkItem(inputOrBatch)) this.noteWorkDone(1);
|
|
9846
|
+
else if (ok) this._eventsProcessed++;
|
|
9847
|
+
})
|
|
9848
|
+
);
|
|
9849
|
+
})
|
|
9850
|
+
)
|
|
9851
|
+
)
|
|
9852
|
+
).subscribe({
|
|
9853
|
+
error: (err) => this.logger.error("Smelter pipeline error", { error: errField(err) })
|
|
9854
|
+
});
|
|
9855
|
+
this.sourceSubscription = this.events$.subscribe((event) => {
|
|
9856
|
+
this.logger.debug("Bus event received", { type: event.type, resourceId: event.resourceId });
|
|
9857
|
+
this.eventSubject.next(event);
|
|
9858
|
+
});
|
|
9859
|
+
this.logger.info("Smelter pipeline initialized");
|
|
9860
|
+
}
|
|
9861
|
+
stop() {
|
|
9862
|
+
this.sourceSubscription?.unsubscribe();
|
|
9863
|
+
this.sourceSubscription = null;
|
|
9864
|
+
this.pipelineSubscription?.unsubscribe();
|
|
9865
|
+
this.pipelineSubscription = null;
|
|
9866
|
+
this.eventSubject.complete();
|
|
9867
|
+
this.logger.info("Smelter stopped");
|
|
9868
|
+
}
|
|
9869
|
+
noteWorkDone(count) {
|
|
9870
|
+
this.workDone += count;
|
|
9871
|
+
if (this.workWaiter && this.workDone >= this.workWaiter.target) {
|
|
9872
|
+
this.workWaiter.resolve();
|
|
9873
|
+
this.workWaiter = null;
|
|
9874
|
+
}
|
|
9875
|
+
}
|
|
9876
|
+
/**
|
|
9877
|
+
* Returns the number of WIRE events processed without error (the S9b
|
|
9878
|
+
* oracle) — `smelt:*` work-item runs tick the drain counter instead.
|
|
9879
|
+
*/
|
|
9880
|
+
async processBatch(events) {
|
|
9881
|
+
let wireProcessed = 0;
|
|
9882
|
+
for (const run of partitionByType(events)) {
|
|
9883
|
+
const workRun = isWorkItem(run[0]);
|
|
9884
|
+
try {
|
|
9885
|
+
if (run.length === 1) {
|
|
9886
|
+
const ok = await this.safeProcessEvent(run[0]);
|
|
9887
|
+
if (ok && !workRun) wireProcessed++;
|
|
9888
|
+
} else {
|
|
9889
|
+
const processed = await this.applyBatchByType(run);
|
|
9890
|
+
if (!workRun) wireProcessed += processed;
|
|
9891
|
+
}
|
|
9892
|
+
} catch (error) {
|
|
9893
|
+
this.logger.error("Smelter failed to process batch run", {
|
|
9894
|
+
eventType: run[0].type,
|
|
9895
|
+
runSize: run.length,
|
|
9896
|
+
error: errField(error)
|
|
9897
|
+
});
|
|
9898
|
+
} finally {
|
|
9899
|
+
if (workRun) this.noteWorkDone(run.length);
|
|
9900
|
+
}
|
|
9901
|
+
}
|
|
9902
|
+
return wireProcessed;
|
|
9903
|
+
}
|
|
9904
|
+
/**
|
|
9905
|
+
* Batch-optimized processing for consecutive events of the same type.
|
|
9906
|
+
* Returns the number of events processed without error.
|
|
9907
|
+
*/
|
|
9908
|
+
async applyBatchByType(events) {
|
|
9909
|
+
switch (events[0].type) {
|
|
9910
|
+
case "yield:created":
|
|
9911
|
+
case "smelt:embed":
|
|
9912
|
+
return this.batchResourceCreated(events);
|
|
9913
|
+
case "mark:added":
|
|
9914
|
+
case "smelt:embed-annotation":
|
|
9915
|
+
return this.batchAnnotationAdded(events);
|
|
9916
|
+
default: {
|
|
9917
|
+
let processed = 0;
|
|
9918
|
+
for (const event of events) {
|
|
9919
|
+
if (await this.safeProcessEvent(event)) processed++;
|
|
9920
|
+
}
|
|
9921
|
+
return processed;
|
|
9922
|
+
}
|
|
9923
|
+
}
|
|
9924
|
+
}
|
|
9925
|
+
/** Returns true if the input was processed without error. */
|
|
9926
|
+
async safeProcessEvent(event) {
|
|
9927
|
+
try {
|
|
9928
|
+
await this.processEvent(event);
|
|
9929
|
+
return true;
|
|
9930
|
+
} catch (err) {
|
|
9931
|
+
this.logger.error("Smelter failed to process event", {
|
|
9932
|
+
type: event.type,
|
|
9933
|
+
resourceId: event.resourceId,
|
|
9934
|
+
error: errField(err)
|
|
9935
|
+
});
|
|
9936
|
+
return false;
|
|
9937
|
+
}
|
|
9938
|
+
}
|
|
9939
|
+
async processEvent(event) {
|
|
9940
|
+
switch (event.type) {
|
|
9941
|
+
case "yield:created":
|
|
9942
|
+
await this.embedResource(event, "Indexed resource");
|
|
9943
|
+
break;
|
|
9944
|
+
case "yield:updated":
|
|
9945
|
+
case "yield:representation-added":
|
|
9946
|
+
await this.embedResource(event, "Re-embedded resource");
|
|
9947
|
+
break;
|
|
9948
|
+
case "mark:archived":
|
|
9949
|
+
await this.handleResourceArchived(event);
|
|
9950
|
+
break;
|
|
9951
|
+
case "mark:added":
|
|
9952
|
+
await this.handleAnnotationAdded(event);
|
|
9953
|
+
break;
|
|
9954
|
+
case "mark:removed":
|
|
9955
|
+
await this.handleAnnotationRemoved(event);
|
|
9956
|
+
break;
|
|
9957
|
+
// Reconcile work items — same handlers, distinct provenance.
|
|
9958
|
+
case "smelt:embed":
|
|
9959
|
+
await this.embedResource(event, "Reconcile-indexed resource");
|
|
9960
|
+
break;
|
|
9961
|
+
case "smelt:purge":
|
|
9962
|
+
await this.handleResourcePurge(event);
|
|
9963
|
+
break;
|
|
9964
|
+
case "smelt:embed-annotation":
|
|
9965
|
+
await this.handleAnnotationAdded(event);
|
|
9966
|
+
break;
|
|
9967
|
+
case "smelt:purge-annotation":
|
|
9968
|
+
await this.handleAnnotationRemoved(event);
|
|
9969
|
+
break;
|
|
9970
|
+
}
|
|
9971
|
+
}
|
|
9972
|
+
async handleResourcePurge(event) {
|
|
9973
|
+
const rid = event.resourceId;
|
|
9974
|
+
if (!rid) return;
|
|
9975
|
+
await this.vectorStore.deleteResourceVectors(resourceId(rid));
|
|
9976
|
+
this.logger.info("Reconcile deleted orphan resource vectors", { resourceId: rid });
|
|
9977
|
+
}
|
|
9978
|
+
/**
|
|
9979
|
+
* Resolve a resource's embeddable text: bytes via the content transport,
|
|
9980
|
+
* gated to media types that decode as text, decoded charset-aware. The
|
|
9981
|
+
* checksum is over the raw bytes actually read — stamped onto the vectors
|
|
9982
|
+
* so reconciliation can compare against the catalog's claim (S12). Returns
|
|
9983
|
+
* null (logged) when the resource doesn't decode as text, is unavailable,
|
|
9984
|
+
* or is empty — callers skip it.
|
|
9985
|
+
*/
|
|
9986
|
+
async fetchEmbeddableText(resourceId$1) {
|
|
9987
|
+
try {
|
|
9988
|
+
const { data, contentType } = await this.content.getBinary(resourceId(resourceId$1));
|
|
9989
|
+
if (textExtractionOf(contentType) !== "decode") {
|
|
9990
|
+
this.logger.debug("Skipping resource that does not decode as text", { resourceId: resourceId$1, contentType });
|
|
9991
|
+
return null;
|
|
9992
|
+
}
|
|
9993
|
+
const bytes = Buffer.from(data);
|
|
9994
|
+
const text = decodeRepresentation(bytes, contentType);
|
|
9995
|
+
return text.trim() ? { text, checksum: calculateChecksum(bytes) } : null;
|
|
9996
|
+
} catch (error) {
|
|
9997
|
+
this.logger.warn("Content unavailable for embedding", { resourceId: resourceId$1, error: errField(error) });
|
|
9998
|
+
return null;
|
|
9999
|
+
}
|
|
10000
|
+
}
|
|
10001
|
+
async embedResource(event, logMessage) {
|
|
10002
|
+
const rid = event.resourceId;
|
|
10003
|
+
if (!rid) return;
|
|
10004
|
+
const fetched = await this.fetchEmbeddableText(rid);
|
|
10005
|
+
if (!fetched) return;
|
|
10006
|
+
const chunks = chunkText(fetched.text, this.chunkingConfig);
|
|
10007
|
+
if (chunks.length === 0) return;
|
|
10008
|
+
const embeddings = await this.embeddingProvider.embedBatch(chunks);
|
|
10009
|
+
const embeddingChunks = chunks.map((t, i) => ({
|
|
10010
|
+
chunkIndex: i,
|
|
10011
|
+
text: t,
|
|
10012
|
+
embedding: embeddings[i]
|
|
10013
|
+
}));
|
|
10014
|
+
await this.vectorStore.upsertResourceVectors(resourceId(rid), embeddingChunks, fetched.checksum);
|
|
10015
|
+
this.logger.info(logMessage, { resourceId: rid, chunks: chunks.length });
|
|
10016
|
+
}
|
|
10017
|
+
async handleResourceArchived(event) {
|
|
10018
|
+
const rid = event.resourceId;
|
|
10019
|
+
if (!rid) return;
|
|
10020
|
+
await this.vectorStore.deleteResourceVectors(resourceId(rid));
|
|
10021
|
+
await this.vectorStore.deleteAnnotationVectorsForResource(resourceId(rid));
|
|
10022
|
+
this.logger.info("Deleted vectors for archived resource", { resourceId: rid });
|
|
10023
|
+
}
|
|
10024
|
+
async handleAnnotationAdded(event) {
|
|
10025
|
+
const annotation = event.payload.annotation;
|
|
10026
|
+
if (!annotation?.id) return;
|
|
10027
|
+
const rid = event.resourceId;
|
|
10028
|
+
if (!rid) return;
|
|
10029
|
+
const selector = getTargetSelector(annotation.target);
|
|
10030
|
+
const exactText = getExactText(selector);
|
|
10031
|
+
if (!exactText?.trim()) return;
|
|
10032
|
+
const aid = annotationId(annotation.id);
|
|
10033
|
+
const embedding2 = await this.embeddingProvider.embed(exactText);
|
|
10034
|
+
const payload = {
|
|
10035
|
+
annotationId: aid,
|
|
10036
|
+
resourceId: resourceId(rid),
|
|
10037
|
+
motivation: annotation.motivation ?? "",
|
|
10038
|
+
entityTypes: annotation.entityTypes ?? [],
|
|
10039
|
+
exactText
|
|
10040
|
+
};
|
|
10041
|
+
await this.vectorStore.upsertAnnotationVector(aid, embedding2, payload);
|
|
10042
|
+
this.logger.info("Indexed annotation", { annotationId: String(aid) });
|
|
10043
|
+
}
|
|
10044
|
+
async handleAnnotationRemoved(event) {
|
|
10045
|
+
const annotationId$1 = event.payload.annotationId;
|
|
10046
|
+
if (!annotationId$1) return;
|
|
10047
|
+
const aid = annotationId(annotationId$1);
|
|
10048
|
+
await this.vectorStore.deleteAnnotationVector(aid);
|
|
10049
|
+
this.logger.info("Deleted annotation vector", { annotationId: annotationId$1 });
|
|
10050
|
+
}
|
|
10051
|
+
/**
|
|
10052
|
+
* Batch-embed chunks from multiple yield:created events in a single
|
|
10053
|
+
* embedBatch() call, then index per resource.
|
|
10054
|
+
*/
|
|
10055
|
+
async batchResourceCreated(events) {
|
|
10056
|
+
const resourceData = [];
|
|
10057
|
+
const allChunks = [];
|
|
10058
|
+
for (const event of events) {
|
|
10059
|
+
const rid = event.resourceId;
|
|
10060
|
+
if (!rid) continue;
|
|
10061
|
+
const fetched = await this.fetchEmbeddableText(rid);
|
|
10062
|
+
if (!fetched) continue;
|
|
10063
|
+
const chunks = chunkText(fetched.text, this.chunkingConfig);
|
|
10064
|
+
if (chunks.length === 0) continue;
|
|
10065
|
+
resourceData.push({ rid: resourceId(rid), chunks, checksum: fetched.checksum });
|
|
10066
|
+
allChunks.push(...chunks);
|
|
10067
|
+
}
|
|
10068
|
+
if (allChunks.length === 0) return events.length;
|
|
10069
|
+
const allEmbeddings = await this.embeddingProvider.embedBatch(allChunks);
|
|
10070
|
+
let offset = 0;
|
|
10071
|
+
for (const { rid, chunks, checksum } of resourceData) {
|
|
10072
|
+
const embeddingChunks = chunks.map((t, i) => ({
|
|
10073
|
+
chunkIndex: i,
|
|
10074
|
+
text: t,
|
|
10075
|
+
embedding: allEmbeddings[offset + i]
|
|
10076
|
+
}));
|
|
10077
|
+
await this.vectorStore.upsertResourceVectors(rid, embeddingChunks, checksum);
|
|
10078
|
+
this.logger.info("Batch-indexed resource", { resourceId: String(rid), chunks: chunks.length });
|
|
10079
|
+
offset += chunks.length;
|
|
10080
|
+
}
|
|
10081
|
+
return events.length;
|
|
10082
|
+
}
|
|
10083
|
+
/**
|
|
10084
|
+
* Batch-embed exact texts from multiple mark:added events in a single
|
|
10085
|
+
* embedBatch() call, then index per annotation.
|
|
10086
|
+
*/
|
|
10087
|
+
async batchAnnotationAdded(events) {
|
|
10088
|
+
const annotationData = [];
|
|
10089
|
+
for (const event of events) {
|
|
10090
|
+
const annotation = event.payload.annotation;
|
|
10091
|
+
if (!annotation?.id) continue;
|
|
10092
|
+
const rid = event.resourceId;
|
|
10093
|
+
if (!rid) continue;
|
|
10094
|
+
const selector = getTargetSelector(annotation.target);
|
|
10095
|
+
const exactText = getExactText(selector);
|
|
10096
|
+
if (!exactText?.trim()) continue;
|
|
10097
|
+
annotationData.push({
|
|
10098
|
+
rid: resourceId(rid),
|
|
10099
|
+
aid: annotationId(annotation.id),
|
|
10100
|
+
exactText,
|
|
10101
|
+
motivation: annotation.motivation ?? "",
|
|
10102
|
+
entityTypes: annotation.entityTypes ?? []
|
|
10103
|
+
});
|
|
10104
|
+
}
|
|
10105
|
+
if (annotationData.length === 0) return events.length;
|
|
10106
|
+
const allEmbeddings = await this.embeddingProvider.embedBatch(
|
|
10107
|
+
annotationData.map((a) => a.exactText)
|
|
10108
|
+
);
|
|
10109
|
+
for (let i = 0; i < annotationData.length; i++) {
|
|
10110
|
+
const { rid, aid, exactText, motivation, entityTypes } = annotationData[i];
|
|
10111
|
+
const payload = {
|
|
10112
|
+
annotationId: aid,
|
|
10113
|
+
resourceId: rid,
|
|
10114
|
+
motivation,
|
|
10115
|
+
entityTypes,
|
|
10116
|
+
exactText
|
|
10117
|
+
};
|
|
10118
|
+
await this.vectorStore.upsertAnnotationVector(aid, allEmbeddings[i], payload);
|
|
10119
|
+
this.logger.info("Batch-indexed annotation", { annotationId: String(aid) });
|
|
10120
|
+
}
|
|
10121
|
+
return events.length;
|
|
10122
|
+
}
|
|
10123
|
+
// ── Reconciliation ───────────────────────────────────────────────────
|
|
10124
|
+
/**
|
|
10125
|
+
* Reconcile the vector store against the KS catalog.
|
|
10126
|
+
*
|
|
10127
|
+
* Lists what IS indexed (via the store's id enumeration) and what SHOULD
|
|
10128
|
+
* be (non-archived resources with embeddable media types, plus their
|
|
10129
|
+
* exact-text annotations, via the `browse:*` RPC channels), then plans the
|
|
10130
|
+
* diff as `smelt:*` work items — embeds for what's missing, purges for
|
|
10131
|
+
* what shouldn't be there — and drains them through the pipeline mailbox.
|
|
10132
|
+
* Work items share the per-resource lanes with live events, so a reconcile
|
|
10133
|
+
* re-embed can never interleave with (or stale-overwrite) live processing
|
|
10134
|
+
* of the same resource (axioms S1/S2). Waves of RECONCILE_WAVE bound how
|
|
10135
|
+
* many embedding calls a cold rebuild has in flight.
|
|
10136
|
+
*
|
|
10137
|
+
* Call after the live subscription is attached so nothing falls in the
|
|
10138
|
+
* gap. The index snapshot is taken BEFORE the catalog listing so a
|
|
10139
|
+
* resource indexed by a live event mid-reconcile is never mistaken for an
|
|
10140
|
+
* orphan; convergence holds because every upsert replaces a resource's
|
|
10141
|
+
* full vector set from current content.
|
|
10142
|
+
*/
|
|
10143
|
+
async reconcile() {
|
|
10144
|
+
if (!this.pipelineSubscription) {
|
|
10145
|
+
throw new Error("Smelter.reconcile() requires initialize() \u2014 work items drain through the pipeline");
|
|
10146
|
+
}
|
|
10147
|
+
this._reconcileState = { phase: "running" };
|
|
10148
|
+
try {
|
|
10149
|
+
const [indexedResources, indexedAnnotations] = await Promise.all([
|
|
10150
|
+
this.vectorStore.listResourceChecksums(),
|
|
10151
|
+
this.vectorStore.listAnnotationIds()
|
|
10152
|
+
]);
|
|
10153
|
+
const resources = await this.listAllResources();
|
|
10154
|
+
this.logger.info("Reconcile started", {
|
|
10155
|
+
indexedResources: indexedResources.size,
|
|
10156
|
+
indexedAnnotations: indexedAnnotations.size,
|
|
10157
|
+
liveResources: resources.length
|
|
10158
|
+
});
|
|
10159
|
+
const embeddable = /* @__PURE__ */ new Map();
|
|
10160
|
+
for (const resource of resources) {
|
|
10161
|
+
const mediaType = getPrimaryMediaType(resource);
|
|
10162
|
+
if (resource["@id"] && mediaType && textExtractionOf(mediaType) === "decode") {
|
|
10163
|
+
embeddable.set(resource["@id"], getPrimaryRepresentation(resource)?.checksum);
|
|
10164
|
+
}
|
|
10165
|
+
}
|
|
10166
|
+
const work = [];
|
|
10167
|
+
for (const rid of indexedResources.keys()) {
|
|
10168
|
+
if (!embeddable.has(rid)) work.push({ type: "smelt:purge", resourceId: rid, payload: {} });
|
|
10169
|
+
}
|
|
10170
|
+
for (const [rid, catalogChecksum] of embeddable) {
|
|
10171
|
+
if (!indexedResources.has(rid)) {
|
|
10172
|
+
work.push({ type: "smelt:embed", resourceId: rid, payload: {} });
|
|
10173
|
+
} else if (catalogChecksum !== void 0 && indexedResources.get(rid) !== catalogChecksum) {
|
|
10174
|
+
work.push({ type: "smelt:embed", resourceId: rid, payload: {} });
|
|
10175
|
+
}
|
|
10176
|
+
}
|
|
10177
|
+
const liveAnnotationIds = /* @__PURE__ */ new Set();
|
|
10178
|
+
for (const resource of resources) {
|
|
10179
|
+
const rid = resource["@id"];
|
|
10180
|
+
if (!rid) continue;
|
|
10181
|
+
const { annotations } = await busRequest(
|
|
10182
|
+
this.bus,
|
|
10183
|
+
"browse:annotations-requested",
|
|
10184
|
+
{ resourceId: rid },
|
|
10185
|
+
"browse:annotations-result",
|
|
10186
|
+
"browse:annotations-failed"
|
|
10187
|
+
);
|
|
10188
|
+
for (const annotation of annotations) {
|
|
10189
|
+
const exactText = getExactText(getTargetSelector(annotation.target));
|
|
10190
|
+
if (!annotation.id || !exactText?.trim()) continue;
|
|
10191
|
+
liveAnnotationIds.add(annotation.id);
|
|
10192
|
+
if (!indexedAnnotations.has(annotation.id)) {
|
|
10193
|
+
work.push({ type: "smelt:embed-annotation", resourceId: rid, payload: { annotation } });
|
|
10194
|
+
}
|
|
10195
|
+
}
|
|
10196
|
+
}
|
|
10197
|
+
for (const aid of indexedAnnotations) {
|
|
10198
|
+
if (!liveAnnotationIds.has(aid)) {
|
|
10199
|
+
work.push({ type: "smelt:purge-annotation", resourceId: aid, payload: { annotationId: aid } });
|
|
10200
|
+
}
|
|
10201
|
+
}
|
|
10202
|
+
await this.drain(work);
|
|
10203
|
+
const summary = {
|
|
10204
|
+
resourcesEmbedded: work.filter((w) => w.type === "smelt:embed").length,
|
|
10205
|
+
resourceVectorsDeleted: work.filter((w) => w.type === "smelt:purge").length,
|
|
10206
|
+
annotationsEmbedded: work.filter((w) => w.type === "smelt:embed-annotation").length,
|
|
10207
|
+
annotationVectorsDeleted: work.filter((w) => w.type === "smelt:purge-annotation").length
|
|
10208
|
+
};
|
|
10209
|
+
this._reconcileState = { phase: "done", summary };
|
|
10210
|
+
this.logger.info("Reconcile complete", { ...summary });
|
|
10211
|
+
return summary;
|
|
10212
|
+
} catch (error) {
|
|
10213
|
+
this._reconcileState = {
|
|
10214
|
+
phase: "failed",
|
|
10215
|
+
error: error instanceof Error ? error.message : String(error)
|
|
10216
|
+
};
|
|
10217
|
+
this.logger.error("Reconcile failed", { error: errField(error) });
|
|
10218
|
+
throw error;
|
|
10219
|
+
}
|
|
10220
|
+
}
|
|
10221
|
+
/**
|
|
10222
|
+
* Enqueue planner work through the mailbox in bounded waves and await
|
|
10223
|
+
* completion. The pipeline ticks `noteWorkDone` for every consumed work
|
|
10224
|
+
* item (success or failure — failures are logged like any live event), so
|
|
10225
|
+
* each wave's waiter resolves exactly when its items have been processed.
|
|
10226
|
+
*/
|
|
10227
|
+
async drain(work) {
|
|
10228
|
+
for (let i = 0; i < work.length; i += _Smelter.RECONCILE_WAVE) {
|
|
10229
|
+
const wave = work.slice(i, i + _Smelter.RECONCILE_WAVE);
|
|
10230
|
+
const done = new Promise((resolve) => {
|
|
10231
|
+
this.workWaiter = { target: this.workDone + wave.length, resolve };
|
|
10232
|
+
});
|
|
10233
|
+
for (const item of wave) this.eventSubject.next(item);
|
|
10234
|
+
await done;
|
|
10235
|
+
}
|
|
10236
|
+
}
|
|
10237
|
+
/** Page through `browse:resources-requested` until the catalog is exhausted. */
|
|
10238
|
+
async listAllResources() {
|
|
10239
|
+
const all = [];
|
|
10240
|
+
for (; ; ) {
|
|
10241
|
+
const page = await busRequest(
|
|
10242
|
+
this.bus,
|
|
10243
|
+
"browse:resources-requested",
|
|
10244
|
+
{ archived: false, offset: all.length, limit: _Smelter.RECONCILE_PAGE_SIZE },
|
|
10245
|
+
"browse:resources-result",
|
|
10246
|
+
"browse:resources-failed"
|
|
10247
|
+
);
|
|
10248
|
+
all.push(...page.resources);
|
|
10249
|
+
if (page.resources.length === 0 || all.length >= page.total) return all;
|
|
10250
|
+
}
|
|
10251
|
+
}
|
|
10252
|
+
};
|
|
9763
10253
|
var configPath = join(homedir(), ".semiontconfig");
|
|
9764
10254
|
var tomlReader = {
|
|
9765
10255
|
readIfExists: (p) => existsSync(p) ? readFileSync(p, "utf-8") : null
|
|
@@ -9796,9 +10286,6 @@ var chunkingConfig = {
|
|
|
9796
10286
|
};
|
|
9797
10287
|
var workerSecret = process.env.SEMIONT_WORKER_SECRET ?? "";
|
|
9798
10288
|
var healthPort = 9091;
|
|
9799
|
-
var BURST_WINDOW_MS = 50;
|
|
9800
|
-
var MAX_BATCH_SIZE = 100;
|
|
9801
|
-
var IDLE_TIMEOUT_MS = 200;
|
|
9802
10289
|
var logger = createProcessLogger("smelter");
|
|
9803
10290
|
async function authenticate() {
|
|
9804
10291
|
if (!workerSecret) {
|
|
@@ -9820,250 +10307,70 @@ async function authenticate() {
|
|
|
9820
10307
|
const { token } = await response.json();
|
|
9821
10308
|
return token;
|
|
9822
10309
|
}
|
|
9823
|
-
var authToken = "";
|
|
9824
|
-
async function fetchContent(resourceId) {
|
|
9825
|
-
try {
|
|
9826
|
-
const response = await fetch(`${baseUrl}/api/resources/${resourceId}`, {
|
|
9827
|
-
headers: {
|
|
9828
|
-
Authorization: `Bearer ${authToken}`,
|
|
9829
|
-
Accept: "text/plain"
|
|
9830
|
-
}
|
|
9831
|
-
});
|
|
9832
|
-
if (!response.ok) return null;
|
|
9833
|
-
return response.text();
|
|
9834
|
-
} catch {
|
|
9835
|
-
return null;
|
|
9836
|
-
}
|
|
9837
|
-
}
|
|
9838
|
-
var vectorStore;
|
|
9839
|
-
var embeddingProvider;
|
|
9840
|
-
var eventsProcessed = 0;
|
|
9841
|
-
async function processEvent(event) {
|
|
9842
|
-
try {
|
|
9843
|
-
switch (event.type) {
|
|
9844
|
-
case "yield:created":
|
|
9845
|
-
await handleResourceCreated(event);
|
|
9846
|
-
break;
|
|
9847
|
-
case "yield:updated":
|
|
9848
|
-
case "yield:representation-added":
|
|
9849
|
-
await handleResourceReembed(event);
|
|
9850
|
-
break;
|
|
9851
|
-
case "mark:archived":
|
|
9852
|
-
await handleResourceArchived(event);
|
|
9853
|
-
break;
|
|
9854
|
-
case "mark:added":
|
|
9855
|
-
await handleAnnotationAdded(event);
|
|
9856
|
-
break;
|
|
9857
|
-
case "mark:removed":
|
|
9858
|
-
await handleAnnotationRemoved(event);
|
|
9859
|
-
break;
|
|
9860
|
-
}
|
|
9861
|
-
eventsProcessed++;
|
|
9862
|
-
} catch (err) {
|
|
9863
|
-
logger.error("Failed to process event", { type: event.type, resourceId: event.resourceId, error: err instanceof Error ? err.message : String(err) });
|
|
9864
|
-
}
|
|
9865
|
-
}
|
|
9866
|
-
async function handleResourceCreated(event) {
|
|
9867
|
-
const rid = event.resourceId;
|
|
9868
|
-
if (!rid) return;
|
|
9869
|
-
const text = await fetchContent(rid);
|
|
9870
|
-
if (!text?.trim()) return;
|
|
9871
|
-
const chunks = chunkText(text, chunkingConfig);
|
|
9872
|
-
if (chunks.length === 0) return;
|
|
9873
|
-
const embeddings = await embeddingProvider.embedBatch(chunks);
|
|
9874
|
-
const embeddingChunks = chunks.map((t, i) => ({
|
|
9875
|
-
chunkIndex: i,
|
|
9876
|
-
text: t,
|
|
9877
|
-
embedding: embeddings[i]
|
|
9878
|
-
}));
|
|
9879
|
-
await vectorStore.upsertResourceVectors(resourceId(rid), embeddingChunks);
|
|
9880
|
-
logger.info("Indexed resource", { resourceId: rid, chunks: chunks.length });
|
|
9881
|
-
}
|
|
9882
|
-
async function handleResourceReembed(event) {
|
|
9883
|
-
const rid = event.resourceId;
|
|
9884
|
-
if (!rid) return;
|
|
9885
|
-
const text = await fetchContent(rid);
|
|
9886
|
-
if (!text?.trim()) return;
|
|
9887
|
-
const chunks = chunkText(text, chunkingConfig);
|
|
9888
|
-
if (chunks.length === 0) return;
|
|
9889
|
-
const embeddings = await embeddingProvider.embedBatch(chunks);
|
|
9890
|
-
const embeddingChunks = chunks.map((t, i) => ({
|
|
9891
|
-
chunkIndex: i,
|
|
9892
|
-
text: t,
|
|
9893
|
-
embedding: embeddings[i]
|
|
9894
|
-
}));
|
|
9895
|
-
await vectorStore.deleteResourceVectors(resourceId(rid));
|
|
9896
|
-
await vectorStore.upsertResourceVectors(resourceId(rid), embeddingChunks);
|
|
9897
|
-
logger.info("Re-embedded resource", { resourceId: rid, chunks: chunks.length });
|
|
9898
|
-
}
|
|
9899
|
-
async function handleResourceArchived(event) {
|
|
9900
|
-
const rid = event.resourceId;
|
|
9901
|
-
if (!rid) return;
|
|
9902
|
-
await vectorStore.deleteResourceVectors(resourceId(rid));
|
|
9903
|
-
logger.info("Deleted vectors for archived resource", { resourceId: rid });
|
|
9904
|
-
}
|
|
9905
|
-
async function handleAnnotationAdded(event) {
|
|
9906
|
-
const annotation = event.payload.annotation;
|
|
9907
|
-
if (!annotation?.id) return;
|
|
9908
|
-
const rid = event.resourceId;
|
|
9909
|
-
if (!rid) return;
|
|
9910
|
-
const selector = getTargetSelector(annotation.target);
|
|
9911
|
-
const exactText = getExactText(selector);
|
|
9912
|
-
if (!exactText?.trim()) return;
|
|
9913
|
-
const aid = annotationId(annotation.id);
|
|
9914
|
-
const embedding2 = await embeddingProvider.embed(exactText);
|
|
9915
|
-
const payload = {
|
|
9916
|
-
annotationId: aid,
|
|
9917
|
-
resourceId: resourceId(rid),
|
|
9918
|
-
motivation: annotation.motivation ?? "",
|
|
9919
|
-
entityTypes: annotation.entityTypes ?? [],
|
|
9920
|
-
exactText
|
|
9921
|
-
};
|
|
9922
|
-
await vectorStore.upsertAnnotationVector(aid, embedding2, payload);
|
|
9923
|
-
logger.info("Indexed annotation", { annotationId: String(aid) });
|
|
9924
|
-
}
|
|
9925
|
-
async function handleAnnotationRemoved(event) {
|
|
9926
|
-
const annotationId$1 = event.payload.annotationId;
|
|
9927
|
-
if (!annotationId$1) return;
|
|
9928
|
-
const aid = annotationId(annotationId$1);
|
|
9929
|
-
await vectorStore.deleteAnnotationVector(aid);
|
|
9930
|
-
logger.info("Deleted annotation vector", { annotationId: annotationId$1 });
|
|
9931
|
-
}
|
|
9932
|
-
async function processBatch(events) {
|
|
9933
|
-
const type = events[0].type;
|
|
9934
|
-
if (type === "yield:created") {
|
|
9935
|
-
await batchResourceCreated(events);
|
|
9936
|
-
} else if (type === "mark:added") {
|
|
9937
|
-
await batchAnnotationAdded(events);
|
|
9938
|
-
} else {
|
|
9939
|
-
for (const event of events) {
|
|
9940
|
-
await processEvent(event);
|
|
9941
|
-
}
|
|
9942
|
-
}
|
|
9943
|
-
}
|
|
9944
|
-
async function batchResourceCreated(events) {
|
|
9945
|
-
const resourceData = [];
|
|
9946
|
-
const allChunks = [];
|
|
9947
|
-
for (const event of events) {
|
|
9948
|
-
const rid = event.resourceId;
|
|
9949
|
-
if (!rid) continue;
|
|
9950
|
-
const text = await fetchContent(rid);
|
|
9951
|
-
if (!text?.trim()) continue;
|
|
9952
|
-
const chunks = chunkText(text, chunkingConfig);
|
|
9953
|
-
if (chunks.length === 0) continue;
|
|
9954
|
-
resourceData.push({ rid: resourceId(rid), chunks });
|
|
9955
|
-
allChunks.push(...chunks);
|
|
9956
|
-
}
|
|
9957
|
-
if (allChunks.length === 0) return;
|
|
9958
|
-
const allEmbeddings = await embeddingProvider.embedBatch(allChunks);
|
|
9959
|
-
let offset = 0;
|
|
9960
|
-
for (const { rid, chunks } of resourceData) {
|
|
9961
|
-
const embeddingChunks = chunks.map((t, i) => ({
|
|
9962
|
-
chunkIndex: i,
|
|
9963
|
-
text: t,
|
|
9964
|
-
embedding: allEmbeddings[offset + i]
|
|
9965
|
-
}));
|
|
9966
|
-
await vectorStore.upsertResourceVectors(rid, embeddingChunks);
|
|
9967
|
-
logger.info("Batch-indexed resource", { resourceId: String(rid), chunks: chunks.length });
|
|
9968
|
-
offset += chunks.length;
|
|
9969
|
-
}
|
|
9970
|
-
eventsProcessed += events.length;
|
|
9971
|
-
}
|
|
9972
|
-
async function batchAnnotationAdded(events) {
|
|
9973
|
-
const annotationData = [];
|
|
9974
|
-
for (const event of events) {
|
|
9975
|
-
const annotation = event.payload.annotation;
|
|
9976
|
-
if (!annotation?.id) continue;
|
|
9977
|
-
const rid = event.resourceId;
|
|
9978
|
-
if (!rid) continue;
|
|
9979
|
-
const selector = getTargetSelector(annotation.target);
|
|
9980
|
-
const exactText = getExactText(selector);
|
|
9981
|
-
if (!exactText?.trim()) continue;
|
|
9982
|
-
annotationData.push({
|
|
9983
|
-
rid: resourceId(rid),
|
|
9984
|
-
aid: annotationId(annotation.id),
|
|
9985
|
-
exactText,
|
|
9986
|
-
motivation: annotation.motivation ?? "",
|
|
9987
|
-
entityTypes: annotation.entityTypes ?? []
|
|
9988
|
-
});
|
|
9989
|
-
}
|
|
9990
|
-
if (annotationData.length === 0) return;
|
|
9991
|
-
const allEmbeddings = await embeddingProvider.embedBatch(
|
|
9992
|
-
annotationData.map((a) => a.exactText)
|
|
9993
|
-
);
|
|
9994
|
-
for (let i = 0; i < annotationData.length; i++) {
|
|
9995
|
-
const { rid, aid, exactText, motivation, entityTypes } = annotationData[i];
|
|
9996
|
-
const payload = {
|
|
9997
|
-
annotationId: aid,
|
|
9998
|
-
resourceId: rid,
|
|
9999
|
-
motivation,
|
|
10000
|
-
entityTypes,
|
|
10001
|
-
exactText
|
|
10002
|
-
};
|
|
10003
|
-
await vectorStore.upsertAnnotationVector(aid, allEmbeddings[i], payload);
|
|
10004
|
-
logger.info("Batch-indexed annotation", { annotationId: String(aid) });
|
|
10005
|
-
}
|
|
10006
|
-
eventsProcessed += events.length;
|
|
10007
|
-
}
|
|
10008
10310
|
async function main() {
|
|
10009
10311
|
const { initObservabilityNode } = await import('@semiont/observability/node');
|
|
10010
10312
|
initObservabilityNode({ serviceName: "semiont-smelter" });
|
|
10011
10313
|
logger.info("Authenticating", { baseUrl });
|
|
10012
|
-
|
|
10314
|
+
const tokenSubject = new import_rxjs3.BehaviorSubject(accessToken(await authenticate()));
|
|
10013
10315
|
logger.info("Authenticated");
|
|
10014
|
-
|
|
10316
|
+
const refreshToken = async () => {
|
|
10317
|
+
const token = await authenticate();
|
|
10318
|
+
tokenSubject.next(accessToken(token));
|
|
10319
|
+
return token;
|
|
10320
|
+
};
|
|
10321
|
+
const reauthTimer = setInterval(() => {
|
|
10322
|
+
refreshToken().catch((error) => {
|
|
10323
|
+
logger.error("Proactive re-authentication failed", {
|
|
10324
|
+
error: error instanceof Error ? error.message : String(error)
|
|
10325
|
+
});
|
|
10326
|
+
});
|
|
10327
|
+
}, 12 * 60 * 60 * 1e3);
|
|
10328
|
+
const embeddingProvider = await createEmbeddingProvider({
|
|
10015
10329
|
type: embeddingType,
|
|
10016
10330
|
model: embeddingModel,
|
|
10017
10331
|
baseURL: embeddingBaseURL
|
|
10018
10332
|
});
|
|
10019
10333
|
logger.info("Embedding provider ready", { type: embeddingType, model: embeddingModel });
|
|
10020
10334
|
const dimensions = embeddingProvider.dimensions();
|
|
10021
|
-
vectorStore = await createVectorStore({
|
|
10335
|
+
const vectorStore = await createVectorStore({
|
|
10022
10336
|
type: "qdrant",
|
|
10023
10337
|
host: qdrantHost,
|
|
10024
10338
|
port: qdrantPort,
|
|
10025
10339
|
dimensions
|
|
10026
10340
|
});
|
|
10027
10341
|
logger.info("Vector store ready", { host: qdrantHost, port: qdrantPort, dimensions });
|
|
10028
|
-
|
|
10342
|
+
registerVectorIndexSizeProvider(() => vectorStore.count());
|
|
10029
10343
|
const httpTransport = new HttpTransport({
|
|
10030
10344
|
baseUrl: baseUrl$1(baseUrl),
|
|
10031
|
-
token$: tokenSubject
|
|
10345
|
+
token$: tokenSubject,
|
|
10346
|
+
tokenRefresher: refreshToken
|
|
10032
10347
|
});
|
|
10033
10348
|
const actorStateUnit = createSmelterActorStateUnit({
|
|
10034
10349
|
bus: httpTransport.actor
|
|
10035
10350
|
});
|
|
10036
|
-
const
|
|
10037
|
-
|
|
10038
|
-
|
|
10039
|
-
|
|
10040
|
-
|
|
10041
|
-
|
|
10042
|
-
|
|
10043
|
-
|
|
10044
|
-
|
|
10045
|
-
|
|
10046
|
-
|
|
10047
|
-
|
|
10048
|
-
|
|
10049
|
-
}
|
|
10050
|
-
return (0, import_rxjs2.from)(processEvent(eventOrBatch));
|
|
10051
|
-
})
|
|
10052
|
-
)
|
|
10053
|
-
)
|
|
10054
|
-
).subscribe({
|
|
10055
|
-
error: (err) => logger.error("Pipeline error", { error: err instanceof Error ? err.message : String(err) })
|
|
10056
|
-
});
|
|
10057
|
-
actorStateUnit.events$.subscribe((event) => {
|
|
10058
|
-
logger.debug("Bus event received", { type: event.type, resourceId: event.resourceId });
|
|
10059
|
-
eventSubject.next(event);
|
|
10060
|
-
});
|
|
10351
|
+
const contentTransport = new HttpContentTransport(httpTransport);
|
|
10352
|
+
logger.info("Content transport ready", { mode: "http" });
|
|
10353
|
+
const smelter = new Smelter(
|
|
10354
|
+
actorStateUnit.events$,
|
|
10355
|
+
vectorStore,
|
|
10356
|
+
embeddingProvider,
|
|
10357
|
+
contentTransport,
|
|
10358
|
+
httpTransport,
|
|
10359
|
+
chunkingConfig,
|
|
10360
|
+
{ burstWindowMs: 50, maxBatchSize: 100, idleTimeoutMs: 200 },
|
|
10361
|
+
logger
|
|
10362
|
+
);
|
|
10363
|
+
smelter.initialize();
|
|
10061
10364
|
actorStateUnit.start();
|
|
10062
10365
|
logger.info("Subscribed to domain events");
|
|
10063
10366
|
const health = createServer((req, res) => {
|
|
10064
10367
|
if (req.url === "/health") {
|
|
10065
10368
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
10066
|
-
res.end(JSON.stringify({
|
|
10369
|
+
res.end(JSON.stringify({
|
|
10370
|
+
status: "ok",
|
|
10371
|
+
eventsProcessed: smelter.eventsProcessed,
|
|
10372
|
+
reconcile: smelter.reconcileState
|
|
10373
|
+
}));
|
|
10067
10374
|
} else {
|
|
10068
10375
|
res.writeHead(404);
|
|
10069
10376
|
res.end();
|
|
@@ -10074,15 +10381,16 @@ async function main() {
|
|
|
10074
10381
|
});
|
|
10075
10382
|
const shutdown = () => {
|
|
10076
10383
|
logger.info("Shutting down");
|
|
10384
|
+
clearInterval(reauthTimer);
|
|
10077
10385
|
actorStateUnit.dispose();
|
|
10078
10386
|
httpTransport.dispose();
|
|
10079
|
-
|
|
10080
|
-
eventSubject.complete();
|
|
10387
|
+
smelter.stop();
|
|
10081
10388
|
health.close();
|
|
10082
10389
|
process.exit(0);
|
|
10083
10390
|
};
|
|
10084
10391
|
process.on("SIGTERM", shutdown);
|
|
10085
10392
|
process.on("SIGINT", shutdown);
|
|
10393
|
+
await smelter.reconcile();
|
|
10086
10394
|
}
|
|
10087
10395
|
main().catch((error) => {
|
|
10088
10396
|
logger.error("Fatal", { error: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : void 0 });
|