@semiont/make-meaning 0.5.6 → 0.5.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -32
- package/dist/index.d.ts +198 -23
- package/dist/index.js +751 -223
- package/dist/index.js.map +1 -1
- package/dist/smelter-main.js +547 -243
- package/dist/smelter-main.js.map +1 -1
- package/package.json +2 -2
package/dist/smelter-main.js
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
1
|
+
import { createTomlConfigLoader, accessToken, baseUrl as baseUrl$1, burstBuffer, errField, resourceId, textExtractionOf, decodeRepresentation, getTargetSelector, getExactText, annotationId, getPrimaryMediaType, getPrimaryRepresentation } from '@semiont/core';
|
|
2
|
+
import { calculateChecksum } from '@semiont/content';
|
|
3
3
|
import { createEmbeddingProvider, createVectorStore, chunkText } from '@semiont/vectors';
|
|
4
|
+
import { registerVectorIndexSizeProvider, withActorSpan } from '@semiont/observability';
|
|
5
|
+
import { busRequest } from '@semiont/sdk';
|
|
6
|
+
import { HttpTransport, HttpContentTransport } from '@semiont/http-transport';
|
|
4
7
|
import { createServer } from 'http';
|
|
5
8
|
import { existsSync, readFileSync } from 'fs';
|
|
6
9
|
import { homedir } from 'os';
|
|
@@ -699,19 +702,19 @@ var require_Observable = __commonJS({
|
|
|
699
702
|
var config_1 = require_config();
|
|
700
703
|
var isFunction_1 = require_isFunction();
|
|
701
704
|
var errorContext_1 = require_errorContext();
|
|
702
|
-
var
|
|
703
|
-
function
|
|
705
|
+
var Observable3 = (function() {
|
|
706
|
+
function Observable4(subscribe) {
|
|
704
707
|
if (subscribe) {
|
|
705
708
|
this._subscribe = subscribe;
|
|
706
709
|
}
|
|
707
710
|
}
|
|
708
|
-
|
|
709
|
-
var observable = new
|
|
711
|
+
Observable4.prototype.lift = function(operator) {
|
|
712
|
+
var observable = new Observable4();
|
|
710
713
|
observable.source = this;
|
|
711
714
|
observable.operator = operator;
|
|
712
715
|
return observable;
|
|
713
716
|
};
|
|
714
|
-
|
|
717
|
+
Observable4.prototype.subscribe = function(observerOrNext, error, complete) {
|
|
715
718
|
var _this = this;
|
|
716
719
|
var subscriber = isSubscriber(observerOrNext) ? observerOrNext : new Subscriber_1.SafeSubscriber(observerOrNext, error, complete);
|
|
717
720
|
errorContext_1.errorContext(function() {
|
|
@@ -720,14 +723,14 @@ var require_Observable = __commonJS({
|
|
|
720
723
|
});
|
|
721
724
|
return subscriber;
|
|
722
725
|
};
|
|
723
|
-
|
|
726
|
+
Observable4.prototype._trySubscribe = function(sink) {
|
|
724
727
|
try {
|
|
725
728
|
return this._subscribe(sink);
|
|
726
729
|
} catch (err) {
|
|
727
730
|
sink.error(err);
|
|
728
731
|
}
|
|
729
732
|
};
|
|
730
|
-
|
|
733
|
+
Observable4.prototype.forEach = function(next, promiseCtor) {
|
|
731
734
|
var _this = this;
|
|
732
735
|
promiseCtor = getPromiseCtor(promiseCtor);
|
|
733
736
|
return new promiseCtor(function(resolve, reject) {
|
|
@@ -746,21 +749,21 @@ var require_Observable = __commonJS({
|
|
|
746
749
|
_this.subscribe(subscriber);
|
|
747
750
|
});
|
|
748
751
|
};
|
|
749
|
-
|
|
752
|
+
Observable4.prototype._subscribe = function(subscriber) {
|
|
750
753
|
var _a;
|
|
751
754
|
return (_a = this.source) === null || _a === void 0 ? void 0 : _a.subscribe(subscriber);
|
|
752
755
|
};
|
|
753
|
-
|
|
756
|
+
Observable4.prototype[observable_1.observable] = function() {
|
|
754
757
|
return this;
|
|
755
758
|
};
|
|
756
|
-
|
|
759
|
+
Observable4.prototype.pipe = function() {
|
|
757
760
|
var operations = [];
|
|
758
761
|
for (var _i = 0; _i < arguments.length; _i++) {
|
|
759
762
|
operations[_i] = arguments[_i];
|
|
760
763
|
}
|
|
761
764
|
return pipe_1.pipeFromArray(operations)(this);
|
|
762
765
|
};
|
|
763
|
-
|
|
766
|
+
Observable4.prototype.toPromise = function(promiseCtor) {
|
|
764
767
|
var _this = this;
|
|
765
768
|
promiseCtor = getPromiseCtor(promiseCtor);
|
|
766
769
|
return new promiseCtor(function(resolve, reject) {
|
|
@@ -774,12 +777,12 @@ var require_Observable = __commonJS({
|
|
|
774
777
|
});
|
|
775
778
|
});
|
|
776
779
|
};
|
|
777
|
-
|
|
778
|
-
return new
|
|
780
|
+
Observable4.create = function(subscribe) {
|
|
781
|
+
return new Observable4(subscribe);
|
|
779
782
|
};
|
|
780
|
-
return
|
|
783
|
+
return Observable4;
|
|
781
784
|
})();
|
|
782
|
-
exports.Observable =
|
|
785
|
+
exports.Observable = Observable3;
|
|
783
786
|
function getPromiseCtor(promiseCtor) {
|
|
784
787
|
var _a;
|
|
785
788
|
return (_a = promiseCtor !== null && promiseCtor !== void 0 ? promiseCtor : config_1.config.Promise) !== null && _a !== void 0 ? _a : Promise;
|
|
@@ -9716,8 +9719,7 @@ var require_operators = __commonJS({
|
|
|
9716
9719
|
});
|
|
9717
9720
|
|
|
9718
9721
|
// src/smelter-main.ts
|
|
9719
|
-
var
|
|
9720
|
-
var import_operators2 = __toESM(require_operators());
|
|
9722
|
+
var import_rxjs3 = __toESM(require_cjs());
|
|
9721
9723
|
|
|
9722
9724
|
// src/smelter-actor-state-unit.ts
|
|
9723
9725
|
var import_rxjs = __toESM(require_cjs());
|
|
@@ -9746,7 +9748,6 @@ function createSmelterActorStateUnit(options) {
|
|
|
9746
9748
|
);
|
|
9747
9749
|
return {
|
|
9748
9750
|
events$,
|
|
9749
|
-
emit: (channel, payload) => bus.emit(channel, payload),
|
|
9750
9751
|
start: () => {
|
|
9751
9752
|
if (started) return;
|
|
9752
9753
|
started = true;
|
|
@@ -9758,8 +9759,493 @@ function createSmelterActorStateUnit(options) {
|
|
|
9758
9759
|
};
|
|
9759
9760
|
}
|
|
9760
9761
|
|
|
9761
|
-
// src/smelter
|
|
9762
|
-
var
|
|
9762
|
+
// src/smelter.ts
|
|
9763
|
+
var import_rxjs2 = __toESM(require_cjs());
|
|
9764
|
+
var import_operators2 = __toESM(require_operators());
|
|
9765
|
+
|
|
9766
|
+
// src/batch-utils.ts
|
|
9767
|
+
function partitionByType(events) {
|
|
9768
|
+
const runs = [];
|
|
9769
|
+
let currentRun = [];
|
|
9770
|
+
for (const event of events) {
|
|
9771
|
+
if (currentRun.length > 0 && currentRun[0].type !== event.type) {
|
|
9772
|
+
runs.push(currentRun);
|
|
9773
|
+
currentRun = [];
|
|
9774
|
+
}
|
|
9775
|
+
currentRun.push(event);
|
|
9776
|
+
}
|
|
9777
|
+
if (currentRun.length > 0) runs.push(currentRun);
|
|
9778
|
+
return runs;
|
|
9779
|
+
}
|
|
9780
|
+
|
|
9781
|
+
// src/smelter.ts
|
|
9782
|
+
function isWorkItem(input) {
|
|
9783
|
+
return input.type.startsWith("smelt:");
|
|
9784
|
+
}
|
|
9785
|
+
var Smelter = class _Smelter {
|
|
9786
|
+
constructor(events$, vectorStore, embeddingProvider, content, bus, chunkingConfig2, timing, logger2) {
|
|
9787
|
+
this.events$ = events$;
|
|
9788
|
+
this.vectorStore = vectorStore;
|
|
9789
|
+
this.embeddingProvider = embeddingProvider;
|
|
9790
|
+
this.content = content;
|
|
9791
|
+
this.bus = bus;
|
|
9792
|
+
this.chunkingConfig = chunkingConfig2;
|
|
9793
|
+
this.timing = timing;
|
|
9794
|
+
this.logger = logger2;
|
|
9795
|
+
}
|
|
9796
|
+
events$;
|
|
9797
|
+
vectorStore;
|
|
9798
|
+
embeddingProvider;
|
|
9799
|
+
content;
|
|
9800
|
+
bus;
|
|
9801
|
+
chunkingConfig;
|
|
9802
|
+
timing;
|
|
9803
|
+
logger;
|
|
9804
|
+
static RECONCILE_PAGE_SIZE = 200;
|
|
9805
|
+
/** Bound on concurrently in-flight reconcile work — a cold rebuild must not fan out unbounded embedding calls. */
|
|
9806
|
+
static RECONCILE_WAVE = 8;
|
|
9807
|
+
eventSubject = new import_rxjs2.Subject();
|
|
9808
|
+
sourceSubscription = null;
|
|
9809
|
+
pipelineSubscription = null;
|
|
9810
|
+
_eventsProcessed = 0;
|
|
9811
|
+
_reconcileState = { phase: "pending" };
|
|
9812
|
+
workDone = 0;
|
|
9813
|
+
workWaiter = null;
|
|
9814
|
+
get eventsProcessed() {
|
|
9815
|
+
return this._eventsProcessed;
|
|
9816
|
+
}
|
|
9817
|
+
get reconcileState() {
|
|
9818
|
+
return this._reconcileState;
|
|
9819
|
+
}
|
|
9820
|
+
initialize() {
|
|
9821
|
+
this.pipelineSubscription = this.eventSubject.pipe(
|
|
9822
|
+
(0, import_operators2.groupBy)((e) => e.resourceId ?? "__unknown__"),
|
|
9823
|
+
(0, import_operators2.mergeMap)(
|
|
9824
|
+
(group) => group.pipe(
|
|
9825
|
+
burstBuffer({
|
|
9826
|
+
burstWindowMs: this.timing.burstWindowMs,
|
|
9827
|
+
maxBatchSize: this.timing.maxBatchSize,
|
|
9828
|
+
idleTimeoutMs: this.timing.idleTimeoutMs
|
|
9829
|
+
}),
|
|
9830
|
+
(0, import_operators2.concatMap)((inputOrBatch) => {
|
|
9831
|
+
if (Array.isArray(inputOrBatch)) {
|
|
9832
|
+
return (0, import_rxjs2.from)(
|
|
9833
|
+
withActorSpan("smelter", "batch", async () => {
|
|
9834
|
+
this._eventsProcessed += await this.processBatch(inputOrBatch);
|
|
9835
|
+
}, { "batch.size": inputOrBatch.length })
|
|
9836
|
+
);
|
|
9837
|
+
}
|
|
9838
|
+
return (0, import_rxjs2.from)(
|
|
9839
|
+
withActorSpan("smelter", inputOrBatch.type, async () => {
|
|
9840
|
+
const ok = await this.safeProcessEvent(inputOrBatch);
|
|
9841
|
+
if (isWorkItem(inputOrBatch)) this.noteWorkDone(1);
|
|
9842
|
+
else if (ok) this._eventsProcessed++;
|
|
9843
|
+
})
|
|
9844
|
+
);
|
|
9845
|
+
})
|
|
9846
|
+
)
|
|
9847
|
+
)
|
|
9848
|
+
).subscribe({
|
|
9849
|
+
error: (err) => this.logger.error("Smelter pipeline error", { error: errField(err) })
|
|
9850
|
+
});
|
|
9851
|
+
this.sourceSubscription = this.events$.subscribe((event) => {
|
|
9852
|
+
this.logger.debug("Bus event received", { type: event.type, resourceId: event.resourceId });
|
|
9853
|
+
this.eventSubject.next(event);
|
|
9854
|
+
});
|
|
9855
|
+
this.logger.info("Smelter pipeline initialized");
|
|
9856
|
+
}
|
|
9857
|
+
stop() {
|
|
9858
|
+
this.sourceSubscription?.unsubscribe();
|
|
9859
|
+
this.sourceSubscription = null;
|
|
9860
|
+
this.pipelineSubscription?.unsubscribe();
|
|
9861
|
+
this.pipelineSubscription = null;
|
|
9862
|
+
this.eventSubject.complete();
|
|
9863
|
+
this.logger.info("Smelter stopped");
|
|
9864
|
+
}
|
|
9865
|
+
noteWorkDone(count) {
|
|
9866
|
+
this.workDone += count;
|
|
9867
|
+
if (this.workWaiter && this.workDone >= this.workWaiter.target) {
|
|
9868
|
+
this.workWaiter.resolve();
|
|
9869
|
+
this.workWaiter = null;
|
|
9870
|
+
}
|
|
9871
|
+
}
|
|
9872
|
+
/**
|
|
9873
|
+
* Returns the number of WIRE events processed without error (the S9b
|
|
9874
|
+
* oracle) — `smelt:*` work-item runs tick the drain counter instead.
|
|
9875
|
+
*/
|
|
9876
|
+
async processBatch(events) {
|
|
9877
|
+
let wireProcessed = 0;
|
|
9878
|
+
for (const run of partitionByType(events)) {
|
|
9879
|
+
const workRun = isWorkItem(run[0]);
|
|
9880
|
+
try {
|
|
9881
|
+
if (run.length === 1) {
|
|
9882
|
+
const ok = await this.safeProcessEvent(run[0]);
|
|
9883
|
+
if (ok && !workRun) wireProcessed++;
|
|
9884
|
+
} else {
|
|
9885
|
+
const processed = await this.applyBatchByType(run);
|
|
9886
|
+
if (!workRun) wireProcessed += processed;
|
|
9887
|
+
}
|
|
9888
|
+
} catch (error) {
|
|
9889
|
+
this.logger.error("Smelter failed to process batch run", {
|
|
9890
|
+
eventType: run[0].type,
|
|
9891
|
+
runSize: run.length,
|
|
9892
|
+
error: errField(error)
|
|
9893
|
+
});
|
|
9894
|
+
} finally {
|
|
9895
|
+
if (workRun) this.noteWorkDone(run.length);
|
|
9896
|
+
}
|
|
9897
|
+
}
|
|
9898
|
+
return wireProcessed;
|
|
9899
|
+
}
|
|
9900
|
+
/**
|
|
9901
|
+
* Batch-optimized processing for consecutive events of the same type.
|
|
9902
|
+
* Returns the number of events processed without error.
|
|
9903
|
+
*/
|
|
9904
|
+
async applyBatchByType(events) {
|
|
9905
|
+
switch (events[0].type) {
|
|
9906
|
+
case "yield:created":
|
|
9907
|
+
case "smelt:embed":
|
|
9908
|
+
return this.batchResourceCreated(events);
|
|
9909
|
+
case "mark:added":
|
|
9910
|
+
case "smelt:embed-annotation":
|
|
9911
|
+
return this.batchAnnotationAdded(events);
|
|
9912
|
+
default: {
|
|
9913
|
+
let processed = 0;
|
|
9914
|
+
for (const event of events) {
|
|
9915
|
+
if (await this.safeProcessEvent(event)) processed++;
|
|
9916
|
+
}
|
|
9917
|
+
return processed;
|
|
9918
|
+
}
|
|
9919
|
+
}
|
|
9920
|
+
}
|
|
9921
|
+
/** Returns true if the input was processed without error. */
|
|
9922
|
+
async safeProcessEvent(event) {
|
|
9923
|
+
try {
|
|
9924
|
+
await this.processEvent(event);
|
|
9925
|
+
return true;
|
|
9926
|
+
} catch (err) {
|
|
9927
|
+
this.logger.error("Smelter failed to process event", {
|
|
9928
|
+
type: event.type,
|
|
9929
|
+
resourceId: event.resourceId,
|
|
9930
|
+
error: errField(err)
|
|
9931
|
+
});
|
|
9932
|
+
return false;
|
|
9933
|
+
}
|
|
9934
|
+
}
|
|
9935
|
+
async processEvent(event) {
|
|
9936
|
+
switch (event.type) {
|
|
9937
|
+
case "yield:created":
|
|
9938
|
+
await this.embedResource(event, "Indexed resource");
|
|
9939
|
+
break;
|
|
9940
|
+
case "yield:updated":
|
|
9941
|
+
case "yield:representation-added":
|
|
9942
|
+
await this.embedResource(event, "Re-embedded resource");
|
|
9943
|
+
break;
|
|
9944
|
+
case "mark:archived":
|
|
9945
|
+
await this.handleResourceArchived(event);
|
|
9946
|
+
break;
|
|
9947
|
+
case "mark:added":
|
|
9948
|
+
await this.handleAnnotationAdded(event);
|
|
9949
|
+
break;
|
|
9950
|
+
case "mark:removed":
|
|
9951
|
+
await this.handleAnnotationRemoved(event);
|
|
9952
|
+
break;
|
|
9953
|
+
// Reconcile work items — same handlers, distinct provenance.
|
|
9954
|
+
case "smelt:embed":
|
|
9955
|
+
await this.embedResource(event, "Reconcile-indexed resource");
|
|
9956
|
+
break;
|
|
9957
|
+
case "smelt:purge":
|
|
9958
|
+
await this.handleResourcePurge(event);
|
|
9959
|
+
break;
|
|
9960
|
+
case "smelt:embed-annotation":
|
|
9961
|
+
await this.handleAnnotationAdded(event);
|
|
9962
|
+
break;
|
|
9963
|
+
case "smelt:purge-annotation":
|
|
9964
|
+
await this.handleAnnotationRemoved(event);
|
|
9965
|
+
break;
|
|
9966
|
+
}
|
|
9967
|
+
}
|
|
9968
|
+
async handleResourcePurge(event) {
|
|
9969
|
+
const rid = event.resourceId;
|
|
9970
|
+
if (!rid) return;
|
|
9971
|
+
await this.vectorStore.deleteResourceVectors(resourceId(rid));
|
|
9972
|
+
this.logger.info("Reconcile deleted orphan resource vectors", { resourceId: rid });
|
|
9973
|
+
}
|
|
9974
|
+
/**
|
|
9975
|
+
* Resolve a resource's embeddable text: bytes via the content transport,
|
|
9976
|
+
* gated to media types that decode as text, decoded charset-aware. The
|
|
9977
|
+
* checksum is over the raw bytes actually read — stamped onto the vectors
|
|
9978
|
+
* so reconciliation can compare against the catalog's claim (S12). Returns
|
|
9979
|
+
* null (logged) when the resource doesn't decode as text, is unavailable,
|
|
9980
|
+
* or is empty — callers skip it.
|
|
9981
|
+
*/
|
|
9982
|
+
async fetchEmbeddableText(resourceId$1) {
|
|
9983
|
+
try {
|
|
9984
|
+
const { data, contentType } = await this.content.getBinary(resourceId(resourceId$1));
|
|
9985
|
+
if (textExtractionOf(contentType) !== "decode") {
|
|
9986
|
+
this.logger.debug("Skipping resource that does not decode as text", { resourceId: resourceId$1, contentType });
|
|
9987
|
+
return null;
|
|
9988
|
+
}
|
|
9989
|
+
const bytes = Buffer.from(data);
|
|
9990
|
+
const text = decodeRepresentation(bytes, contentType);
|
|
9991
|
+
return text.trim() ? { text, checksum: calculateChecksum(bytes) } : null;
|
|
9992
|
+
} catch (error) {
|
|
9993
|
+
this.logger.warn("Content unavailable for embedding", { resourceId: resourceId$1, error: errField(error) });
|
|
9994
|
+
return null;
|
|
9995
|
+
}
|
|
9996
|
+
}
|
|
9997
|
+
async embedResource(event, logMessage) {
|
|
9998
|
+
const rid = event.resourceId;
|
|
9999
|
+
if (!rid) return;
|
|
10000
|
+
const fetched = await this.fetchEmbeddableText(rid);
|
|
10001
|
+
if (!fetched) return;
|
|
10002
|
+
const chunks = chunkText(fetched.text, this.chunkingConfig);
|
|
10003
|
+
if (chunks.length === 0) return;
|
|
10004
|
+
const embeddings = await this.embeddingProvider.embedBatch(chunks);
|
|
10005
|
+
const embeddingChunks = chunks.map((t, i) => ({
|
|
10006
|
+
chunkIndex: i,
|
|
10007
|
+
text: t,
|
|
10008
|
+
embedding: embeddings[i]
|
|
10009
|
+
}));
|
|
10010
|
+
await this.vectorStore.upsertResourceVectors(resourceId(rid), embeddingChunks, fetched.checksum);
|
|
10011
|
+
this.logger.info(logMessage, { resourceId: rid, chunks: chunks.length });
|
|
10012
|
+
}
|
|
10013
|
+
async handleResourceArchived(event) {
|
|
10014
|
+
const rid = event.resourceId;
|
|
10015
|
+
if (!rid) return;
|
|
10016
|
+
await this.vectorStore.deleteResourceVectors(resourceId(rid));
|
|
10017
|
+
await this.vectorStore.deleteAnnotationVectorsForResource(resourceId(rid));
|
|
10018
|
+
this.logger.info("Deleted vectors for archived resource", { resourceId: rid });
|
|
10019
|
+
}
|
|
10020
|
+
async handleAnnotationAdded(event) {
|
|
10021
|
+
const annotation = event.payload.annotation;
|
|
10022
|
+
if (!annotation?.id) return;
|
|
10023
|
+
const rid = event.resourceId;
|
|
10024
|
+
if (!rid) return;
|
|
10025
|
+
const selector = getTargetSelector(annotation.target);
|
|
10026
|
+
const exactText = getExactText(selector);
|
|
10027
|
+
if (!exactText?.trim()) return;
|
|
10028
|
+
const aid = annotationId(annotation.id);
|
|
10029
|
+
const embedding2 = await this.embeddingProvider.embed(exactText);
|
|
10030
|
+
const payload = {
|
|
10031
|
+
annotationId: aid,
|
|
10032
|
+
resourceId: resourceId(rid),
|
|
10033
|
+
motivation: annotation.motivation ?? "",
|
|
10034
|
+
entityTypes: annotation.entityTypes ?? [],
|
|
10035
|
+
exactText
|
|
10036
|
+
};
|
|
10037
|
+
await this.vectorStore.upsertAnnotationVector(aid, embedding2, payload);
|
|
10038
|
+
this.logger.info("Indexed annotation", { annotationId: String(aid) });
|
|
10039
|
+
}
|
|
10040
|
+
async handleAnnotationRemoved(event) {
|
|
10041
|
+
const annotationId$1 = event.payload.annotationId;
|
|
10042
|
+
if (!annotationId$1) return;
|
|
10043
|
+
const aid = annotationId(annotationId$1);
|
|
10044
|
+
await this.vectorStore.deleteAnnotationVector(aid);
|
|
10045
|
+
this.logger.info("Deleted annotation vector", { annotationId: annotationId$1 });
|
|
10046
|
+
}
|
|
10047
|
+
/**
|
|
10048
|
+
* Batch-embed chunks from multiple yield:created events in a single
|
|
10049
|
+
* embedBatch() call, then index per resource.
|
|
10050
|
+
*/
|
|
10051
|
+
async batchResourceCreated(events) {
|
|
10052
|
+
const resourceData = [];
|
|
10053
|
+
const allChunks = [];
|
|
10054
|
+
for (const event of events) {
|
|
10055
|
+
const rid = event.resourceId;
|
|
10056
|
+
if (!rid) continue;
|
|
10057
|
+
const fetched = await this.fetchEmbeddableText(rid);
|
|
10058
|
+
if (!fetched) continue;
|
|
10059
|
+
const chunks = chunkText(fetched.text, this.chunkingConfig);
|
|
10060
|
+
if (chunks.length === 0) continue;
|
|
10061
|
+
resourceData.push({ rid: resourceId(rid), chunks, checksum: fetched.checksum });
|
|
10062
|
+
allChunks.push(...chunks);
|
|
10063
|
+
}
|
|
10064
|
+
if (allChunks.length === 0) return events.length;
|
|
10065
|
+
const allEmbeddings = await this.embeddingProvider.embedBatch(allChunks);
|
|
10066
|
+
let offset = 0;
|
|
10067
|
+
for (const { rid, chunks, checksum } of resourceData) {
|
|
10068
|
+
const embeddingChunks = chunks.map((t, i) => ({
|
|
10069
|
+
chunkIndex: i,
|
|
10070
|
+
text: t,
|
|
10071
|
+
embedding: allEmbeddings[offset + i]
|
|
10072
|
+
}));
|
|
10073
|
+
await this.vectorStore.upsertResourceVectors(rid, embeddingChunks, checksum);
|
|
10074
|
+
this.logger.info("Batch-indexed resource", { resourceId: String(rid), chunks: chunks.length });
|
|
10075
|
+
offset += chunks.length;
|
|
10076
|
+
}
|
|
10077
|
+
return events.length;
|
|
10078
|
+
}
|
|
10079
|
+
/**
|
|
10080
|
+
* Batch-embed exact texts from multiple mark:added events in a single
|
|
10081
|
+
* embedBatch() call, then index per annotation.
|
|
10082
|
+
*/
|
|
10083
|
+
async batchAnnotationAdded(events) {
|
|
10084
|
+
const annotationData = [];
|
|
10085
|
+
for (const event of events) {
|
|
10086
|
+
const annotation = event.payload.annotation;
|
|
10087
|
+
if (!annotation?.id) continue;
|
|
10088
|
+
const rid = event.resourceId;
|
|
10089
|
+
if (!rid) continue;
|
|
10090
|
+
const selector = getTargetSelector(annotation.target);
|
|
10091
|
+
const exactText = getExactText(selector);
|
|
10092
|
+
if (!exactText?.trim()) continue;
|
|
10093
|
+
annotationData.push({
|
|
10094
|
+
rid: resourceId(rid),
|
|
10095
|
+
aid: annotationId(annotation.id),
|
|
10096
|
+
exactText,
|
|
10097
|
+
motivation: annotation.motivation ?? "",
|
|
10098
|
+
entityTypes: annotation.entityTypes ?? []
|
|
10099
|
+
});
|
|
10100
|
+
}
|
|
10101
|
+
if (annotationData.length === 0) return events.length;
|
|
10102
|
+
const allEmbeddings = await this.embeddingProvider.embedBatch(
|
|
10103
|
+
annotationData.map((a) => a.exactText)
|
|
10104
|
+
);
|
|
10105
|
+
for (let i = 0; i < annotationData.length; i++) {
|
|
10106
|
+
const { rid, aid, exactText, motivation, entityTypes } = annotationData[i];
|
|
10107
|
+
const payload = {
|
|
10108
|
+
annotationId: aid,
|
|
10109
|
+
resourceId: rid,
|
|
10110
|
+
motivation,
|
|
10111
|
+
entityTypes,
|
|
10112
|
+
exactText
|
|
10113
|
+
};
|
|
10114
|
+
await this.vectorStore.upsertAnnotationVector(aid, allEmbeddings[i], payload);
|
|
10115
|
+
this.logger.info("Batch-indexed annotation", { annotationId: String(aid) });
|
|
10116
|
+
}
|
|
10117
|
+
return events.length;
|
|
10118
|
+
}
|
|
10119
|
+
// ── Reconciliation ───────────────────────────────────────────────────
|
|
10120
|
+
/**
|
|
10121
|
+
* Reconcile the vector store against the KS catalog.
|
|
10122
|
+
*
|
|
10123
|
+
* Lists what IS indexed (via the store's id enumeration) and what SHOULD
|
|
10124
|
+
* be (non-archived resources with embeddable media types, plus their
|
|
10125
|
+
* exact-text annotations, via the `browse:*` RPC channels), then plans the
|
|
10126
|
+
* diff as `smelt:*` work items — embeds for what's missing, purges for
|
|
10127
|
+
* what shouldn't be there — and drains them through the pipeline mailbox.
|
|
10128
|
+
* Work items share the per-resource lanes with live events, so a reconcile
|
|
10129
|
+
* re-embed can never interleave with (or stale-overwrite) live processing
|
|
10130
|
+
* of the same resource (axioms S1/S2). Waves of RECONCILE_WAVE bound how
|
|
10131
|
+
* many embedding calls a cold rebuild has in flight.
|
|
10132
|
+
*
|
|
10133
|
+
* Call after the live subscription is attached so nothing falls in the
|
|
10134
|
+
* gap. The index snapshot is taken BEFORE the catalog listing so a
|
|
10135
|
+
* resource indexed by a live event mid-reconcile is never mistaken for an
|
|
10136
|
+
* orphan; convergence holds because every upsert replaces a resource's
|
|
10137
|
+
* full vector set from current content.
|
|
10138
|
+
*/
|
|
10139
|
+
async reconcile() {
|
|
10140
|
+
if (!this.pipelineSubscription) {
|
|
10141
|
+
throw new Error("Smelter.reconcile() requires initialize() \u2014 work items drain through the pipeline");
|
|
10142
|
+
}
|
|
10143
|
+
this._reconcileState = { phase: "running" };
|
|
10144
|
+
try {
|
|
10145
|
+
const [indexedResources, indexedAnnotations] = await Promise.all([
|
|
10146
|
+
this.vectorStore.listResourceChecksums(),
|
|
10147
|
+
this.vectorStore.listAnnotationIds()
|
|
10148
|
+
]);
|
|
10149
|
+
const resources = await this.listAllResources();
|
|
10150
|
+
this.logger.info("Reconcile started", {
|
|
10151
|
+
indexedResources: indexedResources.size,
|
|
10152
|
+
indexedAnnotations: indexedAnnotations.size,
|
|
10153
|
+
liveResources: resources.length
|
|
10154
|
+
});
|
|
10155
|
+
const embeddable = /* @__PURE__ */ new Map();
|
|
10156
|
+
for (const resource of resources) {
|
|
10157
|
+
const mediaType = getPrimaryMediaType(resource);
|
|
10158
|
+
if (resource["@id"] && mediaType && textExtractionOf(mediaType) === "decode") {
|
|
10159
|
+
embeddable.set(resource["@id"], getPrimaryRepresentation(resource)?.checksum);
|
|
10160
|
+
}
|
|
10161
|
+
}
|
|
10162
|
+
const work = [];
|
|
10163
|
+
for (const rid of indexedResources.keys()) {
|
|
10164
|
+
if (!embeddable.has(rid)) work.push({ type: "smelt:purge", resourceId: rid, payload: {} });
|
|
10165
|
+
}
|
|
10166
|
+
for (const [rid, catalogChecksum] of embeddable) {
|
|
10167
|
+
if (!indexedResources.has(rid)) {
|
|
10168
|
+
work.push({ type: "smelt:embed", resourceId: rid, payload: {} });
|
|
10169
|
+
} else if (catalogChecksum !== void 0 && indexedResources.get(rid) !== catalogChecksum) {
|
|
10170
|
+
work.push({ type: "smelt:embed", resourceId: rid, payload: {} });
|
|
10171
|
+
}
|
|
10172
|
+
}
|
|
10173
|
+
const liveAnnotationIds = /* @__PURE__ */ new Set();
|
|
10174
|
+
for (const resource of resources) {
|
|
10175
|
+
const rid = resource["@id"];
|
|
10176
|
+
if (!rid) continue;
|
|
10177
|
+
const { annotations } = await busRequest(
|
|
10178
|
+
this.bus,
|
|
10179
|
+
"browse:annotations-requested",
|
|
10180
|
+
{ resourceId: rid },
|
|
10181
|
+
"browse:annotations-result",
|
|
10182
|
+
"browse:annotations-failed"
|
|
10183
|
+
);
|
|
10184
|
+
for (const annotation of annotations) {
|
|
10185
|
+
const exactText = getExactText(getTargetSelector(annotation.target));
|
|
10186
|
+
if (!annotation.id || !exactText?.trim()) continue;
|
|
10187
|
+
liveAnnotationIds.add(annotation.id);
|
|
10188
|
+
if (!indexedAnnotations.has(annotation.id)) {
|
|
10189
|
+
work.push({ type: "smelt:embed-annotation", resourceId: rid, payload: { annotation } });
|
|
10190
|
+
}
|
|
10191
|
+
}
|
|
10192
|
+
}
|
|
10193
|
+
for (const aid of indexedAnnotations) {
|
|
10194
|
+
if (!liveAnnotationIds.has(aid)) {
|
|
10195
|
+
work.push({ type: "smelt:purge-annotation", resourceId: aid, payload: { annotationId: aid } });
|
|
10196
|
+
}
|
|
10197
|
+
}
|
|
10198
|
+
await this.drain(work);
|
|
10199
|
+
const summary = {
|
|
10200
|
+
resourcesEmbedded: work.filter((w) => w.type === "smelt:embed").length,
|
|
10201
|
+
resourceVectorsDeleted: work.filter((w) => w.type === "smelt:purge").length,
|
|
10202
|
+
annotationsEmbedded: work.filter((w) => w.type === "smelt:embed-annotation").length,
|
|
10203
|
+
annotationVectorsDeleted: work.filter((w) => w.type === "smelt:purge-annotation").length
|
|
10204
|
+
};
|
|
10205
|
+
this._reconcileState = { phase: "done", summary };
|
|
10206
|
+
this.logger.info("Reconcile complete", { ...summary });
|
|
10207
|
+
return summary;
|
|
10208
|
+
} catch (error) {
|
|
10209
|
+
this._reconcileState = {
|
|
10210
|
+
phase: "failed",
|
|
10211
|
+
error: error instanceof Error ? error.message : String(error)
|
|
10212
|
+
};
|
|
10213
|
+
this.logger.error("Reconcile failed", { error: errField(error) });
|
|
10214
|
+
throw error;
|
|
10215
|
+
}
|
|
10216
|
+
}
|
|
10217
|
+
/**
|
|
10218
|
+
* Enqueue planner work through the mailbox in bounded waves and await
|
|
10219
|
+
* completion. The pipeline ticks `noteWorkDone` for every consumed work
|
|
10220
|
+
* item (success or failure — failures are logged like any live event), so
|
|
10221
|
+
* each wave's waiter resolves exactly when its items have been processed.
|
|
10222
|
+
*/
|
|
10223
|
+
async drain(work) {
|
|
10224
|
+
for (let i = 0; i < work.length; i += _Smelter.RECONCILE_WAVE) {
|
|
10225
|
+
const wave = work.slice(i, i + _Smelter.RECONCILE_WAVE);
|
|
10226
|
+
const done = new Promise((resolve) => {
|
|
10227
|
+
this.workWaiter = { target: this.workDone + wave.length, resolve };
|
|
10228
|
+
});
|
|
10229
|
+
for (const item of wave) this.eventSubject.next(item);
|
|
10230
|
+
await done;
|
|
10231
|
+
}
|
|
10232
|
+
}
|
|
10233
|
+
/** Page through `browse:resources-requested` until the catalog is exhausted. */
|
|
10234
|
+
async listAllResources() {
|
|
10235
|
+
const all = [];
|
|
10236
|
+
for (; ; ) {
|
|
10237
|
+
const page = await busRequest(
|
|
10238
|
+
this.bus,
|
|
10239
|
+
"browse:resources-requested",
|
|
10240
|
+
{ archived: false, offset: all.length, limit: _Smelter.RECONCILE_PAGE_SIZE },
|
|
10241
|
+
"browse:resources-result",
|
|
10242
|
+
"browse:resources-failed"
|
|
10243
|
+
);
|
|
10244
|
+
all.push(...page.resources);
|
|
10245
|
+
if (page.resources.length === 0 || all.length >= page.total) return all;
|
|
10246
|
+
}
|
|
10247
|
+
}
|
|
10248
|
+
};
|
|
9763
10249
|
var configPath = join(homedir(), ".semiontconfig");
|
|
9764
10250
|
var tomlReader = {
|
|
9765
10251
|
readIfExists: (p) => existsSync(p) ? readFileSync(p, "utf-8") : null
|
|
@@ -9796,9 +10282,6 @@ var chunkingConfig = {
|
|
|
9796
10282
|
};
|
|
9797
10283
|
var workerSecret = process.env.SEMIONT_WORKER_SECRET ?? "";
|
|
9798
10284
|
var healthPort = 9091;
|
|
9799
|
-
var BURST_WINDOW_MS = 50;
|
|
9800
|
-
var MAX_BATCH_SIZE = 100;
|
|
9801
|
-
var IDLE_TIMEOUT_MS = 200;
|
|
9802
10285
|
var logger = createProcessLogger("smelter");
|
|
9803
10286
|
async function authenticate() {
|
|
9804
10287
|
if (!workerSecret) {
|
|
@@ -9820,250 +10303,70 @@ async function authenticate() {
|
|
|
9820
10303
|
const { token } = await response.json();
|
|
9821
10304
|
return token;
|
|
9822
10305
|
}
|
|
9823
|
-
var authToken = "";
|
|
9824
|
-
async function fetchContent(resourceId) {
|
|
9825
|
-
try {
|
|
9826
|
-
const response = await fetch(`${baseUrl}/api/resources/${resourceId}`, {
|
|
9827
|
-
headers: {
|
|
9828
|
-
Authorization: `Bearer ${authToken}`,
|
|
9829
|
-
Accept: "text/plain"
|
|
9830
|
-
}
|
|
9831
|
-
});
|
|
9832
|
-
if (!response.ok) return null;
|
|
9833
|
-
return response.text();
|
|
9834
|
-
} catch {
|
|
9835
|
-
return null;
|
|
9836
|
-
}
|
|
9837
|
-
}
|
|
9838
|
-
var vectorStore;
|
|
9839
|
-
var embeddingProvider;
|
|
9840
|
-
var eventsProcessed = 0;
|
|
9841
|
-
async function processEvent(event) {
|
|
9842
|
-
try {
|
|
9843
|
-
switch (event.type) {
|
|
9844
|
-
case "yield:created":
|
|
9845
|
-
await handleResourceCreated(event);
|
|
9846
|
-
break;
|
|
9847
|
-
case "yield:updated":
|
|
9848
|
-
case "yield:representation-added":
|
|
9849
|
-
await handleResourceReembed(event);
|
|
9850
|
-
break;
|
|
9851
|
-
case "mark:archived":
|
|
9852
|
-
await handleResourceArchived(event);
|
|
9853
|
-
break;
|
|
9854
|
-
case "mark:added":
|
|
9855
|
-
await handleAnnotationAdded(event);
|
|
9856
|
-
break;
|
|
9857
|
-
case "mark:removed":
|
|
9858
|
-
await handleAnnotationRemoved(event);
|
|
9859
|
-
break;
|
|
9860
|
-
}
|
|
9861
|
-
eventsProcessed++;
|
|
9862
|
-
} catch (err) {
|
|
9863
|
-
logger.error("Failed to process event", { type: event.type, resourceId: event.resourceId, error: err instanceof Error ? err.message : String(err) });
|
|
9864
|
-
}
|
|
9865
|
-
}
|
|
9866
|
-
async function handleResourceCreated(event) {
|
|
9867
|
-
const rid = event.resourceId;
|
|
9868
|
-
if (!rid) return;
|
|
9869
|
-
const text = await fetchContent(rid);
|
|
9870
|
-
if (!text?.trim()) return;
|
|
9871
|
-
const chunks = chunkText(text, chunkingConfig);
|
|
9872
|
-
if (chunks.length === 0) return;
|
|
9873
|
-
const embeddings = await embeddingProvider.embedBatch(chunks);
|
|
9874
|
-
const embeddingChunks = chunks.map((t, i) => ({
|
|
9875
|
-
chunkIndex: i,
|
|
9876
|
-
text: t,
|
|
9877
|
-
embedding: embeddings[i]
|
|
9878
|
-
}));
|
|
9879
|
-
await vectorStore.upsertResourceVectors(resourceId(rid), embeddingChunks);
|
|
9880
|
-
logger.info("Indexed resource", { resourceId: rid, chunks: chunks.length });
|
|
9881
|
-
}
|
|
9882
|
-
async function handleResourceReembed(event) {
|
|
9883
|
-
const rid = event.resourceId;
|
|
9884
|
-
if (!rid) return;
|
|
9885
|
-
const text = await fetchContent(rid);
|
|
9886
|
-
if (!text?.trim()) return;
|
|
9887
|
-
const chunks = chunkText(text, chunkingConfig);
|
|
9888
|
-
if (chunks.length === 0) return;
|
|
9889
|
-
const embeddings = await embeddingProvider.embedBatch(chunks);
|
|
9890
|
-
const embeddingChunks = chunks.map((t, i) => ({
|
|
9891
|
-
chunkIndex: i,
|
|
9892
|
-
text: t,
|
|
9893
|
-
embedding: embeddings[i]
|
|
9894
|
-
}));
|
|
9895
|
-
await vectorStore.deleteResourceVectors(resourceId(rid));
|
|
9896
|
-
await vectorStore.upsertResourceVectors(resourceId(rid), embeddingChunks);
|
|
9897
|
-
logger.info("Re-embedded resource", { resourceId: rid, chunks: chunks.length });
|
|
9898
|
-
}
|
|
9899
|
-
async function handleResourceArchived(event) {
|
|
9900
|
-
const rid = event.resourceId;
|
|
9901
|
-
if (!rid) return;
|
|
9902
|
-
await vectorStore.deleteResourceVectors(resourceId(rid));
|
|
9903
|
-
logger.info("Deleted vectors for archived resource", { resourceId: rid });
|
|
9904
|
-
}
|
|
9905
|
-
async function handleAnnotationAdded(event) {
|
|
9906
|
-
const annotation = event.payload.annotation;
|
|
9907
|
-
if (!annotation?.id) return;
|
|
9908
|
-
const rid = event.resourceId;
|
|
9909
|
-
if (!rid) return;
|
|
9910
|
-
const selector = getTargetSelector(annotation.target);
|
|
9911
|
-
const exactText = getExactText(selector);
|
|
9912
|
-
if (!exactText?.trim()) return;
|
|
9913
|
-
const aid = annotationId(annotation.id);
|
|
9914
|
-
const embedding2 = await embeddingProvider.embed(exactText);
|
|
9915
|
-
const payload = {
|
|
9916
|
-
annotationId: aid,
|
|
9917
|
-
resourceId: resourceId(rid),
|
|
9918
|
-
motivation: annotation.motivation ?? "",
|
|
9919
|
-
entityTypes: annotation.entityTypes ?? [],
|
|
9920
|
-
exactText
|
|
9921
|
-
};
|
|
9922
|
-
await vectorStore.upsertAnnotationVector(aid, embedding2, payload);
|
|
9923
|
-
logger.info("Indexed annotation", { annotationId: String(aid) });
|
|
9924
|
-
}
|
|
9925
|
-
async function handleAnnotationRemoved(event) {
|
|
9926
|
-
const annotationId$1 = event.payload.annotationId;
|
|
9927
|
-
if (!annotationId$1) return;
|
|
9928
|
-
const aid = annotationId(annotationId$1);
|
|
9929
|
-
await vectorStore.deleteAnnotationVector(aid);
|
|
9930
|
-
logger.info("Deleted annotation vector", { annotationId: annotationId$1 });
|
|
9931
|
-
}
|
|
9932
|
-
async function processBatch(events) {
|
|
9933
|
-
const type = events[0].type;
|
|
9934
|
-
if (type === "yield:created") {
|
|
9935
|
-
await batchResourceCreated(events);
|
|
9936
|
-
} else if (type === "mark:added") {
|
|
9937
|
-
await batchAnnotationAdded(events);
|
|
9938
|
-
} else {
|
|
9939
|
-
for (const event of events) {
|
|
9940
|
-
await processEvent(event);
|
|
9941
|
-
}
|
|
9942
|
-
}
|
|
9943
|
-
}
|
|
9944
|
-
async function batchResourceCreated(events) {
|
|
9945
|
-
const resourceData = [];
|
|
9946
|
-
const allChunks = [];
|
|
9947
|
-
for (const event of events) {
|
|
9948
|
-
const rid = event.resourceId;
|
|
9949
|
-
if (!rid) continue;
|
|
9950
|
-
const text = await fetchContent(rid);
|
|
9951
|
-
if (!text?.trim()) continue;
|
|
9952
|
-
const chunks = chunkText(text, chunkingConfig);
|
|
9953
|
-
if (chunks.length === 0) continue;
|
|
9954
|
-
resourceData.push({ rid: resourceId(rid), chunks });
|
|
9955
|
-
allChunks.push(...chunks);
|
|
9956
|
-
}
|
|
9957
|
-
if (allChunks.length === 0) return;
|
|
9958
|
-
const allEmbeddings = await embeddingProvider.embedBatch(allChunks);
|
|
9959
|
-
let offset = 0;
|
|
9960
|
-
for (const { rid, chunks } of resourceData) {
|
|
9961
|
-
const embeddingChunks = chunks.map((t, i) => ({
|
|
9962
|
-
chunkIndex: i,
|
|
9963
|
-
text: t,
|
|
9964
|
-
embedding: allEmbeddings[offset + i]
|
|
9965
|
-
}));
|
|
9966
|
-
await vectorStore.upsertResourceVectors(rid, embeddingChunks);
|
|
9967
|
-
logger.info("Batch-indexed resource", { resourceId: String(rid), chunks: chunks.length });
|
|
9968
|
-
offset += chunks.length;
|
|
9969
|
-
}
|
|
9970
|
-
eventsProcessed += events.length;
|
|
9971
|
-
}
|
|
9972
|
-
async function batchAnnotationAdded(events) {
|
|
9973
|
-
const annotationData = [];
|
|
9974
|
-
for (const event of events) {
|
|
9975
|
-
const annotation = event.payload.annotation;
|
|
9976
|
-
if (!annotation?.id) continue;
|
|
9977
|
-
const rid = event.resourceId;
|
|
9978
|
-
if (!rid) continue;
|
|
9979
|
-
const selector = getTargetSelector(annotation.target);
|
|
9980
|
-
const exactText = getExactText(selector);
|
|
9981
|
-
if (!exactText?.trim()) continue;
|
|
9982
|
-
annotationData.push({
|
|
9983
|
-
rid: resourceId(rid),
|
|
9984
|
-
aid: annotationId(annotation.id),
|
|
9985
|
-
exactText,
|
|
9986
|
-
motivation: annotation.motivation ?? "",
|
|
9987
|
-
entityTypes: annotation.entityTypes ?? []
|
|
9988
|
-
});
|
|
9989
|
-
}
|
|
9990
|
-
if (annotationData.length === 0) return;
|
|
9991
|
-
const allEmbeddings = await embeddingProvider.embedBatch(
|
|
9992
|
-
annotationData.map((a) => a.exactText)
|
|
9993
|
-
);
|
|
9994
|
-
for (let i = 0; i < annotationData.length; i++) {
|
|
9995
|
-
const { rid, aid, exactText, motivation, entityTypes } = annotationData[i];
|
|
9996
|
-
const payload = {
|
|
9997
|
-
annotationId: aid,
|
|
9998
|
-
resourceId: rid,
|
|
9999
|
-
motivation,
|
|
10000
|
-
entityTypes,
|
|
10001
|
-
exactText
|
|
10002
|
-
};
|
|
10003
|
-
await vectorStore.upsertAnnotationVector(aid, allEmbeddings[i], payload);
|
|
10004
|
-
logger.info("Batch-indexed annotation", { annotationId: String(aid) });
|
|
10005
|
-
}
|
|
10006
|
-
eventsProcessed += events.length;
|
|
10007
|
-
}
|
|
10008
10306
|
async function main() {
|
|
10009
10307
|
const { initObservabilityNode } = await import('@semiont/observability/node');
|
|
10010
10308
|
initObservabilityNode({ serviceName: "semiont-smelter" });
|
|
10011
10309
|
logger.info("Authenticating", { baseUrl });
|
|
10012
|
-
|
|
10310
|
+
const tokenSubject = new import_rxjs3.BehaviorSubject(accessToken(await authenticate()));
|
|
10013
10311
|
logger.info("Authenticated");
|
|
10014
|
-
|
|
10312
|
+
const refreshToken = async () => {
|
|
10313
|
+
const token = await authenticate();
|
|
10314
|
+
tokenSubject.next(accessToken(token));
|
|
10315
|
+
return token;
|
|
10316
|
+
};
|
|
10317
|
+
const reauthTimer = setInterval(() => {
|
|
10318
|
+
refreshToken().catch((error) => {
|
|
10319
|
+
logger.error("Proactive re-authentication failed", {
|
|
10320
|
+
error: error instanceof Error ? error.message : String(error)
|
|
10321
|
+
});
|
|
10322
|
+
});
|
|
10323
|
+
}, 12 * 60 * 60 * 1e3);
|
|
10324
|
+
const embeddingProvider = await createEmbeddingProvider({
|
|
10015
10325
|
type: embeddingType,
|
|
10016
10326
|
model: embeddingModel,
|
|
10017
10327
|
baseURL: embeddingBaseURL
|
|
10018
10328
|
});
|
|
10019
10329
|
logger.info("Embedding provider ready", { type: embeddingType, model: embeddingModel });
|
|
10020
10330
|
const dimensions = embeddingProvider.dimensions();
|
|
10021
|
-
vectorStore = await createVectorStore({
|
|
10331
|
+
const vectorStore = await createVectorStore({
|
|
10022
10332
|
type: "qdrant",
|
|
10023
10333
|
host: qdrantHost,
|
|
10024
10334
|
port: qdrantPort,
|
|
10025
10335
|
dimensions
|
|
10026
10336
|
});
|
|
10027
10337
|
logger.info("Vector store ready", { host: qdrantHost, port: qdrantPort, dimensions });
|
|
10028
|
-
|
|
10338
|
+
registerVectorIndexSizeProvider(() => vectorStore.count());
|
|
10029
10339
|
const httpTransport = new HttpTransport({
|
|
10030
10340
|
baseUrl: baseUrl$1(baseUrl),
|
|
10031
|
-
token$: tokenSubject
|
|
10341
|
+
token$: tokenSubject,
|
|
10342
|
+
tokenRefresher: refreshToken
|
|
10032
10343
|
});
|
|
10033
10344
|
const actorStateUnit = createSmelterActorStateUnit({
|
|
10034
10345
|
bus: httpTransport.actor
|
|
10035
10346
|
});
|
|
10036
|
-
const
|
|
10037
|
-
|
|
10038
|
-
|
|
10039
|
-
|
|
10040
|
-
|
|
10041
|
-
|
|
10042
|
-
|
|
10043
|
-
|
|
10044
|
-
|
|
10045
|
-
|
|
10046
|
-
|
|
10047
|
-
|
|
10048
|
-
|
|
10049
|
-
}
|
|
10050
|
-
return (0, import_rxjs2.from)(processEvent(eventOrBatch));
|
|
10051
|
-
})
|
|
10052
|
-
)
|
|
10053
|
-
)
|
|
10054
|
-
).subscribe({
|
|
10055
|
-
error: (err) => logger.error("Pipeline error", { error: err instanceof Error ? err.message : String(err) })
|
|
10056
|
-
});
|
|
10057
|
-
actorStateUnit.events$.subscribe((event) => {
|
|
10058
|
-
logger.debug("Bus event received", { type: event.type, resourceId: event.resourceId });
|
|
10059
|
-
eventSubject.next(event);
|
|
10060
|
-
});
|
|
10347
|
+
const contentTransport = new HttpContentTransport(httpTransport);
|
|
10348
|
+
logger.info("Content transport ready", { mode: "http" });
|
|
10349
|
+
const smelter = new Smelter(
|
|
10350
|
+
actorStateUnit.events$,
|
|
10351
|
+
vectorStore,
|
|
10352
|
+
embeddingProvider,
|
|
10353
|
+
contentTransport,
|
|
10354
|
+
httpTransport,
|
|
10355
|
+
chunkingConfig,
|
|
10356
|
+
{ burstWindowMs: 50, maxBatchSize: 100, idleTimeoutMs: 200 },
|
|
10357
|
+
logger
|
|
10358
|
+
);
|
|
10359
|
+
smelter.initialize();
|
|
10061
10360
|
actorStateUnit.start();
|
|
10062
10361
|
logger.info("Subscribed to domain events");
|
|
10063
10362
|
const health = createServer((req, res) => {
|
|
10064
10363
|
if (req.url === "/health") {
|
|
10065
10364
|
res.writeHead(200, { "Content-Type": "application/json" });
|
|
10066
|
-
res.end(JSON.stringify({
|
|
10365
|
+
res.end(JSON.stringify({
|
|
10366
|
+
status: "ok",
|
|
10367
|
+
eventsProcessed: smelter.eventsProcessed,
|
|
10368
|
+
reconcile: smelter.reconcileState
|
|
10369
|
+
}));
|
|
10067
10370
|
} else {
|
|
10068
10371
|
res.writeHead(404);
|
|
10069
10372
|
res.end();
|
|
@@ -10074,15 +10377,16 @@ async function main() {
|
|
|
10074
10377
|
});
|
|
10075
10378
|
const shutdown = () => {
|
|
10076
10379
|
logger.info("Shutting down");
|
|
10380
|
+
clearInterval(reauthTimer);
|
|
10077
10381
|
actorStateUnit.dispose();
|
|
10078
10382
|
httpTransport.dispose();
|
|
10079
|
-
|
|
10080
|
-
eventSubject.complete();
|
|
10383
|
+
smelter.stop();
|
|
10081
10384
|
health.close();
|
|
10082
10385
|
process.exit(0);
|
|
10083
10386
|
};
|
|
10084
10387
|
process.on("SIGTERM", shutdown);
|
|
10085
10388
|
process.on("SIGINT", shutdown);
|
|
10389
|
+
await smelter.reconcile();
|
|
10086
10390
|
}
|
|
10087
10391
|
main().catch((error) => {
|
|
10088
10392
|
logger.error("Fatal", { error: error instanceof Error ? error.message : String(error), stack: error instanceof Error ? error.stack : void 0 });
|