goldenmatch 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +140 -0
- package/dist/cli.cjs +6079 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.d.cts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +6076 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/index.cjs +8449 -0
- package/dist/core/index.cjs.map +1 -0
- package/dist/core/index.d.cts +1972 -0
- package/dist/core/index.d.ts +1972 -0
- package/dist/core/index.js +8318 -0
- package/dist/core/index.js.map +1 -0
- package/dist/index.cjs +8449 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +8318 -0
- package/dist/index.js.map +1 -0
- package/dist/node/backends/score-worker.cjs +934 -0
- package/dist/node/backends/score-worker.cjs.map +1 -0
- package/dist/node/backends/score-worker.d.cts +14 -0
- package/dist/node/backends/score-worker.d.ts +14 -0
- package/dist/node/backends/score-worker.js +932 -0
- package/dist/node/backends/score-worker.js.map +1 -0
- package/dist/node/index.cjs +11430 -0
- package/dist/node/index.cjs.map +1 -0
- package/dist/node/index.d.cts +554 -0
- package/dist/node/index.d.ts +554 -0
- package/dist/node/index.js +11277 -0
- package/dist/node/index.js.map +1 -0
- package/dist/types-DhUdX5Rc.d.cts +304 -0
- package/dist/types-DhUdX5Rc.d.ts +304 -0
- package/examples/01-basic-dedupe.ts +60 -0
- package/examples/02-match-two-datasets.ts +48 -0
- package/examples/03-csv-file-pipeline.ts +62 -0
- package/examples/04-string-scoring.ts +63 -0
- package/examples/05-custom-config.ts +94 -0
- package/examples/06-probabilistic-fs.ts +72 -0
- package/examples/07-pprl-privacy.ts +76 -0
- package/examples/08-streaming.ts +79 -0
- package/examples/09-llm-scorer.ts +79 -0
- package/examples/10-explain.ts +60 -0
- package/examples/11-evaluate.ts +61 -0
- package/examples/README.md +53 -0
- package/package.json +66 -0
- package/src/cli.ts +372 -0
- package/src/core/ann-blocker.ts +593 -0
- package/src/core/api.ts +220 -0
- package/src/core/autoconfig.ts +363 -0
- package/src/core/autofix.ts +102 -0
- package/src/core/blocker.ts +655 -0
- package/src/core/cluster.ts +699 -0
- package/src/core/compare-clusters.ts +176 -0
- package/src/core/config/loader.ts +869 -0
- package/src/core/cross-encoder.ts +614 -0
- package/src/core/data.ts +430 -0
- package/src/core/domain.ts +277 -0
- package/src/core/embedder.ts +562 -0
- package/src/core/evaluate.ts +156 -0
- package/src/core/explain.ts +352 -0
- package/src/core/golden.ts +524 -0
- package/src/core/graph-er.ts +371 -0
- package/src/core/index.ts +314 -0
- package/src/core/ingest.ts +112 -0
- package/src/core/learned-blocking.ts +305 -0
- package/src/core/lineage.ts +221 -0
- package/src/core/llm/budget.ts +258 -0
- package/src/core/llm/cluster.ts +542 -0
- package/src/core/llm/scorer.ts +396 -0
- package/src/core/match-one.ts +95 -0
- package/src/core/matchkey.ts +97 -0
- package/src/core/memory/corrections.ts +179 -0
- package/src/core/memory/learner.ts +218 -0
- package/src/core/memory/store.ts +114 -0
- package/src/core/pipeline.ts +366 -0
- package/src/core/pprl/protocol.ts +216 -0
- package/src/core/probabilistic.ts +511 -0
- package/src/core/profiler.ts +212 -0
- package/src/core/quality.ts +197 -0
- package/src/core/review-queue.ts +177 -0
- package/src/core/scorer.ts +855 -0
- package/src/core/sensitivity.ts +196 -0
- package/src/core/standardize.ts +279 -0
- package/src/core/streaming.ts +128 -0
- package/src/core/transforms.ts +599 -0
- package/src/core/types.ts +570 -0
- package/src/core/validate.ts +243 -0
- package/src/index.ts +8 -0
- package/src/node/a2a/server.ts +470 -0
- package/src/node/api/server.ts +412 -0
- package/src/node/backends/duckdb.ts +130 -0
- package/src/node/backends/score-worker.ts +41 -0
- package/src/node/backends/workers.ts +212 -0
- package/src/node/config-file.ts +66 -0
- package/src/node/connectors/base.ts +57 -0
- package/src/node/connectors/bigquery.ts +61 -0
- package/src/node/connectors/databricks.ts +69 -0
- package/src/node/connectors/file.ts +350 -0
- package/src/node/connectors/hubspot.ts +62 -0
- package/src/node/connectors/index.ts +43 -0
- package/src/node/connectors/salesforce.ts +93 -0
- package/src/node/connectors/snowflake.ts +73 -0
- package/src/node/db/postgres.ts +173 -0
- package/src/node/db/sync.ts +103 -0
- package/src/node/dedupe-file.ts +156 -0
- package/src/node/index.ts +89 -0
- package/src/node/mcp/server.ts +940 -0
- package/src/node/tui/app.ts +756 -0
- package/src/node/tui/index.ts +6 -0
- package/src/node/tui/widgets.ts +128 -0
- package/tests/parity/scorer-ground-truth.test.ts +118 -0
- package/tests/smoke.test.ts +46 -0
- package/tests/unit/a2a-server.test.ts +175 -0
- package/tests/unit/ann-blocker.test.ts +117 -0
- package/tests/unit/api-server.test.ts +239 -0
- package/tests/unit/api.test.ts +77 -0
- package/tests/unit/autoconfig.test.ts +103 -0
- package/tests/unit/autofix.test.ts +71 -0
- package/tests/unit/blocker.test.ts +164 -0
- package/tests/unit/buildBlocksAsync.test.ts +63 -0
- package/tests/unit/cluster.test.ts +213 -0
- package/tests/unit/compare-clusters.test.ts +42 -0
- package/tests/unit/config-loader.test.ts +301 -0
- package/tests/unit/connectors-base.test.ts +48 -0
- package/tests/unit/cross-encoder-model.test.ts +198 -0
- package/tests/unit/cross-encoder.test.ts +173 -0
- package/tests/unit/db-connectors.test.ts +37 -0
- package/tests/unit/domain.test.ts +80 -0
- package/tests/unit/embedder.test.ts +151 -0
- package/tests/unit/evaluate.test.ts +85 -0
- package/tests/unit/explain.test.ts +73 -0
- package/tests/unit/golden.test.ts +97 -0
- package/tests/unit/graph-er.test.ts +173 -0
- package/tests/unit/hnsw-ann.test.ts +283 -0
- package/tests/unit/hubspot-connector.test.ts +118 -0
- package/tests/unit/ingest.test.ts +97 -0
- package/tests/unit/learned-blocking.test.ts +134 -0
- package/tests/unit/lineage.test.ts +135 -0
- package/tests/unit/match-one.test.ts +129 -0
- package/tests/unit/matchkey.test.ts +97 -0
- package/tests/unit/mcp-server.test.ts +183 -0
- package/tests/unit/memory.test.ts +119 -0
- package/tests/unit/pipeline.test.ts +118 -0
- package/tests/unit/pprl-protocol.test.ts +381 -0
- package/tests/unit/probabilistic.test.ts +494 -0
- package/tests/unit/profiler.test.ts +68 -0
- package/tests/unit/review-queue.test.ts +68 -0
- package/tests/unit/salesforce-connector.test.ts +148 -0
- package/tests/unit/scorer.test.ts +301 -0
- package/tests/unit/sensitivity.test.ts +154 -0
- package/tests/unit/standardize.test.ts +84 -0
- package/tests/unit/streaming.test.ts +82 -0
- package/tests/unit/transforms.test.ts +208 -0
- package/tests/unit/tui-widgets.test.ts +42 -0
- package/tests/unit/tui.test.ts +24 -0
- package/tests/unit/validate.test.ts +145 -0
- package/tests/unit/workers-parallel.test.ts +99 -0
- package/tests/unit/workers.test.ts +74 -0
- package/tsconfig.json +25 -0
- package/tsup.config.ts +37 -0
- package/vitest.config.ts +11 -0
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export { B as BlockResult, b as BlockingConfig, c as BlockingKeyConfig, d as BudgetConfig, C as CanopyConfig, e as ClusterInfo, f as ClusterProvenance, g as ColumnValue, D as DedupeResult, h as DedupeStats, i as DomainConfig, E as ExactMatchkey, F as FieldProvenance, j as GoldenFieldRule, G as GoldenMatchConfig, k as GoldenRulesConfig, I as InputConfig, l as InputFileConfig, L as LLMScorerConfig, m as LearningConfig, n as MakeMatchkeyConfigInput, a as MatchResult, M as MatchkeyConfig, o as MatchkeyField, p as MemoryConfig, O as OutputConfig, P as PairKey, q as ProbabilisticMatchkey, Q as QualityConfig, R as Row, S as ScoredPair, r as SortKeyField, s as StandardizationConfig, T as TransformConfig, V as VALID_SCORERS, t as VALID_STANDARDIZERS, u as VALID_STRATEGIES, v as VALID_TRANSFORMS, w as ValidationConfig, x as ValidationRuleConfig, W as WeightedMatchkey, y as getMatchkeys, z as makeBlockingConfig, A as makeConfig, H as makeGoldenRulesConfig, J as makeMatchkeyConfig, K as makeMatchkeyField, N as makeScoredPair } from './types-DhUdX5Rc.cjs';
|
|
2
|
+
export { ANNBlocker, ANNBlockerBase, ANNBlockerOptions, AutoFixLog, AutoconfigOptions, BudgetSnapshot, BudgetTracker, BuildANNOptions, CCMSResult, ClusterExplanation, ColumnProfile, Correction, CreateANNBlockerOptions, CrossEncoderHttpError, CrossEncoderModel, CrossEncoderModelOptions, CrossEncoderOptions, CrossEncoderProvider, CrossEncoderReranker, DatasetProfile, DomainProfile, EMResult, Embedder, EmbedderError, EmbedderOptions, EmbedderProvider, EmbeddingResult, EvalResult, GatedResult, GraphERResult, HNSWANNBlocker, HNSWIndexLike, HNSWModule, HNSWOptions, LLMScoreResult, LearnedParams, LearnedPredicate, LearnedRules, LineageBundle, LineageEdge, MemoryLearner, MemoryStore, MemoryStoreConfig, PPRLConfig, PPRLResult, PairExplanation, QualityFinding, Relationship, ReviewItem, ReviewQueue, SensitivityResult, StreamProcessor, SweepParam, SweepPoint, TableSchema, TabularData, UnionFind, ValidationReport, ValidationRule, _resetCrossEncoderModelCache, addRowIds, addSourceColumn, addToCluster, applyColumnMap, applyCorrections, applyLearnedBlocks, applyStandardization, applyStandardizer, applyTransform, applyTransforms, asString, autoConfigurePPRL, autoConfigureRows, autoFixRows, buildANNBlocks, buildANNPairBlocks, buildAdaptiveBlocks, buildBlocks, buildBlocksAsync, buildClusters, buildComparisonVector, buildGoldenRecord, buildGoldenRecordWithProvenance, buildLineage, buildMst, buildMultiPassBlocks, buildStaticBlocks, compareClusters, computeClusterConfidence, computeMatchkeyValue, computeMatchkeys, concatRows, configToYaml, cosineSim, countTokensApprox, createANNBlocker, dedupe, detectDomain, diceCoefficient, ensembleScore, euclideanDist, evaluateClusters, evaluatePairs, explainCluster, explainPair, extractFeatures, findExactMatches, findExactMatchesOne, findFuzzyMatches, gatePairs, getClusterPairScores, getEmbedder, hashRow, indelDistance, indelSimilarity, isNullish, jaccardSimilarity, jaro, jaroWinkler, learnBlockingRules, levenshteinDistance, levenshteinSimilarity, lineageFromJson, lineageToJson, llmClusterPairs, llmScorePairs, loadGroundTruthPairs, match, matchOne, mergeField, metaphone, pairKey, parseConfig, parseConfigYaml, parsePairKey, profileRows, rerankPair, rerankTopPairs, runDedupePipeline, runGraphER, runMatchPipeline, runPPRL, runQualityCheck, runSensitivity, scanQuality, scoreBlocksSequential, scoreField, scoreMatrix, scorePair, scorePairRecord, scoreProbabilistic, scoreStrings, scoreStringsWithLlm, selectBestBlockingKey, soundex, soundexMatch, splitOversizedCluster, stabilityReport, toColumnValue, tokenSortRatio, trainEM, unmergeCluster, unmergeRecord, validateColumns, validateRows } from './core/index.cjs';
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
1
|
+
export { B as BlockResult, b as BlockingConfig, c as BlockingKeyConfig, d as BudgetConfig, C as CanopyConfig, e as ClusterInfo, f as ClusterProvenance, g as ColumnValue, D as DedupeResult, h as DedupeStats, i as DomainConfig, E as ExactMatchkey, F as FieldProvenance, j as GoldenFieldRule, G as GoldenMatchConfig, k as GoldenRulesConfig, I as InputConfig, l as InputFileConfig, L as LLMScorerConfig, m as LearningConfig, n as MakeMatchkeyConfigInput, a as MatchResult, M as MatchkeyConfig, o as MatchkeyField, p as MemoryConfig, O as OutputConfig, P as PairKey, q as ProbabilisticMatchkey, Q as QualityConfig, R as Row, S as ScoredPair, r as SortKeyField, s as StandardizationConfig, T as TransformConfig, V as VALID_SCORERS, t as VALID_STANDARDIZERS, u as VALID_STRATEGIES, v as VALID_TRANSFORMS, w as ValidationConfig, x as ValidationRuleConfig, W as WeightedMatchkey, y as getMatchkeys, z as makeBlockingConfig, A as makeConfig, H as makeGoldenRulesConfig, J as makeMatchkeyConfig, K as makeMatchkeyField, N as makeScoredPair } from './types-DhUdX5Rc.js';
|
|
2
|
+
export { ANNBlocker, ANNBlockerBase, ANNBlockerOptions, AutoFixLog, AutoconfigOptions, BudgetSnapshot, BudgetTracker, BuildANNOptions, CCMSResult, ClusterExplanation, ColumnProfile, Correction, CreateANNBlockerOptions, CrossEncoderHttpError, CrossEncoderModel, CrossEncoderModelOptions, CrossEncoderOptions, CrossEncoderProvider, CrossEncoderReranker, DatasetProfile, DomainProfile, EMResult, Embedder, EmbedderError, EmbedderOptions, EmbedderProvider, EmbeddingResult, EvalResult, GatedResult, GraphERResult, HNSWANNBlocker, HNSWIndexLike, HNSWModule, HNSWOptions, LLMScoreResult, LearnedParams, LearnedPredicate, LearnedRules, LineageBundle, LineageEdge, MemoryLearner, MemoryStore, MemoryStoreConfig, PPRLConfig, PPRLResult, PairExplanation, QualityFinding, Relationship, ReviewItem, ReviewQueue, SensitivityResult, StreamProcessor, SweepParam, SweepPoint, TableSchema, TabularData, UnionFind, ValidationReport, ValidationRule, _resetCrossEncoderModelCache, addRowIds, addSourceColumn, addToCluster, applyColumnMap, applyCorrections, applyLearnedBlocks, applyStandardization, applyStandardizer, applyTransform, applyTransforms, asString, autoConfigurePPRL, autoConfigureRows, autoFixRows, buildANNBlocks, buildANNPairBlocks, buildAdaptiveBlocks, buildBlocks, buildBlocksAsync, buildClusters, buildComparisonVector, buildGoldenRecord, buildGoldenRecordWithProvenance, buildLineage, buildMst, buildMultiPassBlocks, buildStaticBlocks, compareClusters, computeClusterConfidence, computeMatchkeyValue, computeMatchkeys, concatRows, configToYaml, cosineSim, countTokensApprox, createANNBlocker, dedupe, detectDomain, diceCoefficient, ensembleScore, euclideanDist, evaluateClusters, evaluatePairs, explainCluster, explainPair, extractFeatures, findExactMatches, findExactMatchesOne, findFuzzyMatches, gatePairs, getClusterPairScores, getEmbedder, hashRow, indelDistance, indelSimilarity, isNullish, jaccardSimilarity, jaro, jaroWinkler, learnBlockingRules, levenshteinDistance, levenshteinSimilarity, lineageFromJson, lineageToJson, llmClusterPairs, llmScorePairs, loadGroundTruthPairs, match, matchOne, mergeField, metaphone, pairKey, parseConfig, parseConfigYaml, parsePairKey, profileRows, rerankPair, rerankTopPairs, runDedupePipeline, runGraphER, runMatchPipeline, runPPRL, runQualityCheck, runSensitivity, scanQuality, scoreBlocksSequential, scoreField, scoreMatrix, scorePair, scorePairRecord, scoreProbabilistic, scoreStrings, scoreStringsWithLlm, selectBestBlockingKey, soundex, soundexMatch, splitOversizedCluster, stabilityReport, toColumnValue, tokenSortRatio, trainEM, unmergeCluster, unmergeRecord, validateColumns, validateRows } from './core/index.js';
|