goldenmatch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/README.md +140 -0
  2. package/dist/cli.cjs +6079 -0
  3. package/dist/cli.cjs.map +1 -0
  4. package/dist/cli.d.cts +1 -0
  5. package/dist/cli.d.ts +1 -0
  6. package/dist/cli.js +6076 -0
  7. package/dist/cli.js.map +1 -0
  8. package/dist/core/index.cjs +8449 -0
  9. package/dist/core/index.cjs.map +1 -0
  10. package/dist/core/index.d.cts +1972 -0
  11. package/dist/core/index.d.ts +1972 -0
  12. package/dist/core/index.js +8318 -0
  13. package/dist/core/index.js.map +1 -0
  14. package/dist/index.cjs +8449 -0
  15. package/dist/index.cjs.map +1 -0
  16. package/dist/index.d.cts +2 -0
  17. package/dist/index.d.ts +2 -0
  18. package/dist/index.js +8318 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/node/backends/score-worker.cjs +934 -0
  21. package/dist/node/backends/score-worker.cjs.map +1 -0
  22. package/dist/node/backends/score-worker.d.cts +14 -0
  23. package/dist/node/backends/score-worker.d.ts +14 -0
  24. package/dist/node/backends/score-worker.js +932 -0
  25. package/dist/node/backends/score-worker.js.map +1 -0
  26. package/dist/node/index.cjs +11430 -0
  27. package/dist/node/index.cjs.map +1 -0
  28. package/dist/node/index.d.cts +554 -0
  29. package/dist/node/index.d.ts +554 -0
  30. package/dist/node/index.js +11277 -0
  31. package/dist/node/index.js.map +1 -0
  32. package/dist/types-DhUdX5Rc.d.cts +304 -0
  33. package/dist/types-DhUdX5Rc.d.ts +304 -0
  34. package/examples/01-basic-dedupe.ts +60 -0
  35. package/examples/02-match-two-datasets.ts +48 -0
  36. package/examples/03-csv-file-pipeline.ts +62 -0
  37. package/examples/04-string-scoring.ts +63 -0
  38. package/examples/05-custom-config.ts +94 -0
  39. package/examples/06-probabilistic-fs.ts +72 -0
  40. package/examples/07-pprl-privacy.ts +76 -0
  41. package/examples/08-streaming.ts +79 -0
  42. package/examples/09-llm-scorer.ts +79 -0
  43. package/examples/10-explain.ts +60 -0
  44. package/examples/11-evaluate.ts +61 -0
  45. package/examples/README.md +53 -0
  46. package/package.json +66 -0
  47. package/src/cli.ts +372 -0
  48. package/src/core/ann-blocker.ts +593 -0
  49. package/src/core/api.ts +220 -0
  50. package/src/core/autoconfig.ts +363 -0
  51. package/src/core/autofix.ts +102 -0
  52. package/src/core/blocker.ts +655 -0
  53. package/src/core/cluster.ts +699 -0
  54. package/src/core/compare-clusters.ts +176 -0
  55. package/src/core/config/loader.ts +869 -0
  56. package/src/core/cross-encoder.ts +614 -0
  57. package/src/core/data.ts +430 -0
  58. package/src/core/domain.ts +277 -0
  59. package/src/core/embedder.ts +562 -0
  60. package/src/core/evaluate.ts +156 -0
  61. package/src/core/explain.ts +352 -0
  62. package/src/core/golden.ts +524 -0
  63. package/src/core/graph-er.ts +371 -0
  64. package/src/core/index.ts +314 -0
  65. package/src/core/ingest.ts +112 -0
  66. package/src/core/learned-blocking.ts +305 -0
  67. package/src/core/lineage.ts +221 -0
  68. package/src/core/llm/budget.ts +258 -0
  69. package/src/core/llm/cluster.ts +542 -0
  70. package/src/core/llm/scorer.ts +396 -0
  71. package/src/core/match-one.ts +95 -0
  72. package/src/core/matchkey.ts +97 -0
  73. package/src/core/memory/corrections.ts +179 -0
  74. package/src/core/memory/learner.ts +218 -0
  75. package/src/core/memory/store.ts +114 -0
  76. package/src/core/pipeline.ts +366 -0
  77. package/src/core/pprl/protocol.ts +216 -0
  78. package/src/core/probabilistic.ts +511 -0
  79. package/src/core/profiler.ts +212 -0
  80. package/src/core/quality.ts +197 -0
  81. package/src/core/review-queue.ts +177 -0
  82. package/src/core/scorer.ts +855 -0
  83. package/src/core/sensitivity.ts +196 -0
  84. package/src/core/standardize.ts +279 -0
  85. package/src/core/streaming.ts +128 -0
  86. package/src/core/transforms.ts +599 -0
  87. package/src/core/types.ts +570 -0
  88. package/src/core/validate.ts +243 -0
  89. package/src/index.ts +8 -0
  90. package/src/node/a2a/server.ts +470 -0
  91. package/src/node/api/server.ts +412 -0
  92. package/src/node/backends/duckdb.ts +130 -0
  93. package/src/node/backends/score-worker.ts +41 -0
  94. package/src/node/backends/workers.ts +212 -0
  95. package/src/node/config-file.ts +66 -0
  96. package/src/node/connectors/base.ts +57 -0
  97. package/src/node/connectors/bigquery.ts +61 -0
  98. package/src/node/connectors/databricks.ts +69 -0
  99. package/src/node/connectors/file.ts +350 -0
  100. package/src/node/connectors/hubspot.ts +62 -0
  101. package/src/node/connectors/index.ts +43 -0
  102. package/src/node/connectors/salesforce.ts +93 -0
  103. package/src/node/connectors/snowflake.ts +73 -0
  104. package/src/node/db/postgres.ts +173 -0
  105. package/src/node/db/sync.ts +103 -0
  106. package/src/node/dedupe-file.ts +156 -0
  107. package/src/node/index.ts +89 -0
  108. package/src/node/mcp/server.ts +940 -0
  109. package/src/node/tui/app.ts +756 -0
  110. package/src/node/tui/index.ts +6 -0
  111. package/src/node/tui/widgets.ts +128 -0
  112. package/tests/parity/scorer-ground-truth.test.ts +118 -0
  113. package/tests/smoke.test.ts +46 -0
  114. package/tests/unit/a2a-server.test.ts +175 -0
  115. package/tests/unit/ann-blocker.test.ts +117 -0
  116. package/tests/unit/api-server.test.ts +239 -0
  117. package/tests/unit/api.test.ts +77 -0
  118. package/tests/unit/autoconfig.test.ts +103 -0
  119. package/tests/unit/autofix.test.ts +71 -0
  120. package/tests/unit/blocker.test.ts +164 -0
  121. package/tests/unit/buildBlocksAsync.test.ts +63 -0
  122. package/tests/unit/cluster.test.ts +213 -0
  123. package/tests/unit/compare-clusters.test.ts +42 -0
  124. package/tests/unit/config-loader.test.ts +301 -0
  125. package/tests/unit/connectors-base.test.ts +48 -0
  126. package/tests/unit/cross-encoder-model.test.ts +198 -0
  127. package/tests/unit/cross-encoder.test.ts +173 -0
  128. package/tests/unit/db-connectors.test.ts +37 -0
  129. package/tests/unit/domain.test.ts +80 -0
  130. package/tests/unit/embedder.test.ts +151 -0
  131. package/tests/unit/evaluate.test.ts +85 -0
  132. package/tests/unit/explain.test.ts +73 -0
  133. package/tests/unit/golden.test.ts +97 -0
  134. package/tests/unit/graph-er.test.ts +173 -0
  135. package/tests/unit/hnsw-ann.test.ts +283 -0
  136. package/tests/unit/hubspot-connector.test.ts +118 -0
  137. package/tests/unit/ingest.test.ts +97 -0
  138. package/tests/unit/learned-blocking.test.ts +134 -0
  139. package/tests/unit/lineage.test.ts +135 -0
  140. package/tests/unit/match-one.test.ts +129 -0
  141. package/tests/unit/matchkey.test.ts +97 -0
  142. package/tests/unit/mcp-server.test.ts +183 -0
  143. package/tests/unit/memory.test.ts +119 -0
  144. package/tests/unit/pipeline.test.ts +118 -0
  145. package/tests/unit/pprl-protocol.test.ts +381 -0
  146. package/tests/unit/probabilistic.test.ts +494 -0
  147. package/tests/unit/profiler.test.ts +68 -0
  148. package/tests/unit/review-queue.test.ts +68 -0
  149. package/tests/unit/salesforce-connector.test.ts +148 -0
  150. package/tests/unit/scorer.test.ts +301 -0
  151. package/tests/unit/sensitivity.test.ts +154 -0
  152. package/tests/unit/standardize.test.ts +84 -0
  153. package/tests/unit/streaming.test.ts +82 -0
  154. package/tests/unit/transforms.test.ts +208 -0
  155. package/tests/unit/tui-widgets.test.ts +42 -0
  156. package/tests/unit/tui.test.ts +24 -0
  157. package/tests/unit/validate.test.ts +145 -0
  158. package/tests/unit/workers-parallel.test.ts +99 -0
  159. package/tests/unit/workers.test.ts +74 -0
  160. package/tsconfig.json +25 -0
  161. package/tsup.config.ts +37 -0
  162. package/vitest.config.ts +11 -0
@@ -0,0 +1,2 @@
1
+ export { B as BlockResult, b as BlockingConfig, c as BlockingKeyConfig, d as BudgetConfig, C as CanopyConfig, e as ClusterInfo, f as ClusterProvenance, g as ColumnValue, D as DedupeResult, h as DedupeStats, i as DomainConfig, E as ExactMatchkey, F as FieldProvenance, j as GoldenFieldRule, G as GoldenMatchConfig, k as GoldenRulesConfig, I as InputConfig, l as InputFileConfig, L as LLMScorerConfig, m as LearningConfig, n as MakeMatchkeyConfigInput, a as MatchResult, M as MatchkeyConfig, o as MatchkeyField, p as MemoryConfig, O as OutputConfig, P as PairKey, q as ProbabilisticMatchkey, Q as QualityConfig, R as Row, S as ScoredPair, r as SortKeyField, s as StandardizationConfig, T as TransformConfig, V as VALID_SCORERS, t as VALID_STANDARDIZERS, u as VALID_STRATEGIES, v as VALID_TRANSFORMS, w as ValidationConfig, x as ValidationRuleConfig, W as WeightedMatchkey, y as getMatchkeys, z as makeBlockingConfig, A as makeConfig, H as makeGoldenRulesConfig, J as makeMatchkeyConfig, K as makeMatchkeyField, N as makeScoredPair } from './types-DhUdX5Rc.cjs';
2
+ export { ANNBlocker, ANNBlockerBase, ANNBlockerOptions, AutoFixLog, AutoconfigOptions, BudgetSnapshot, BudgetTracker, BuildANNOptions, CCMSResult, ClusterExplanation, ColumnProfile, Correction, CreateANNBlockerOptions, CrossEncoderHttpError, CrossEncoderModel, CrossEncoderModelOptions, CrossEncoderOptions, CrossEncoderProvider, CrossEncoderReranker, DatasetProfile, DomainProfile, EMResult, Embedder, EmbedderError, EmbedderOptions, EmbedderProvider, EmbeddingResult, EvalResult, GatedResult, GraphERResult, HNSWANNBlocker, HNSWIndexLike, HNSWModule, HNSWOptions, LLMScoreResult, LearnedParams, LearnedPredicate, LearnedRules, LineageBundle, LineageEdge, MemoryLearner, MemoryStore, MemoryStoreConfig, PPRLConfig, PPRLResult, PairExplanation, QualityFinding, Relationship, ReviewItem, ReviewQueue, SensitivityResult, StreamProcessor, SweepParam, SweepPoint, TableSchema, TabularData, UnionFind, ValidationReport, ValidationRule, _resetCrossEncoderModelCache, addRowIds, addSourceColumn, addToCluster, applyColumnMap, applyCorrections, applyLearnedBlocks, applyStandardization, applyStandardizer, applyTransform, applyTransforms, asString, autoConfigurePPRL, autoConfigureRows, autoFixRows, buildANNBlocks, buildANNPairBlocks, buildAdaptiveBlocks, buildBlocks, buildBlocksAsync, buildClusters, buildComparisonVector, buildGoldenRecord, buildGoldenRecordWithProvenance, buildLineage, buildMst, buildMultiPassBlocks, buildStaticBlocks, compareClusters, computeClusterConfidence, computeMatchkeyValue, computeMatchkeys, concatRows, configToYaml, cosineSim, countTokensApprox, createANNBlocker, dedupe, detectDomain, diceCoefficient, ensembleScore, euclideanDist, evaluateClusters, evaluatePairs, explainCluster, explainPair, extractFeatures, findExactMatches, findExactMatchesOne, findFuzzyMatches, gatePairs, getClusterPairScores, getEmbedder, hashRow, indelDistance, indelSimilarity, isNullish, jaccardSimilarity, jaro, jaroWinkler, learnBlockingRules, levenshteinDistance, levenshteinSimilarity, lineageFromJson, lineageToJson, llmClusterPairs, llmScorePairs, loadGroundTruthPairs, match, matchOne, mergeField, metaphone, pairKey, parseConfig, parseConfigYaml, parsePairKey, profileRows, rerankPair, rerankTopPairs, runDedupePipeline, runGraphER, runMatchPipeline, runPPRL, runQualityCheck, runSensitivity, scanQuality, scoreBlocksSequential, scoreField, scoreMatrix, scorePair, scorePairRecord, scoreProbabilistic, scoreStrings, scoreStringsWithLlm, selectBestBlockingKey, soundex, soundexMatch, splitOversizedCluster, stabilityReport, toColumnValue, tokenSortRatio, trainEM, unmergeCluster, unmergeRecord, validateColumns, validateRows } from './core/index.cjs';
@@ -0,0 +1,2 @@
1
+ export { B as BlockResult, b as BlockingConfig, c as BlockingKeyConfig, d as BudgetConfig, C as CanopyConfig, e as ClusterInfo, f as ClusterProvenance, g as ColumnValue, D as DedupeResult, h as DedupeStats, i as DomainConfig, E as ExactMatchkey, F as FieldProvenance, j as GoldenFieldRule, G as GoldenMatchConfig, k as GoldenRulesConfig, I as InputConfig, l as InputFileConfig, L as LLMScorerConfig, m as LearningConfig, n as MakeMatchkeyConfigInput, a as MatchResult, M as MatchkeyConfig, o as MatchkeyField, p as MemoryConfig, O as OutputConfig, P as PairKey, q as ProbabilisticMatchkey, Q as QualityConfig, R as Row, S as ScoredPair, r as SortKeyField, s as StandardizationConfig, T as TransformConfig, V as VALID_SCORERS, t as VALID_STANDARDIZERS, u as VALID_STRATEGIES, v as VALID_TRANSFORMS, w as ValidationConfig, x as ValidationRuleConfig, W as WeightedMatchkey, y as getMatchkeys, z as makeBlockingConfig, A as makeConfig, H as makeGoldenRulesConfig, J as makeMatchkeyConfig, K as makeMatchkeyField, N as makeScoredPair } from './types-DhUdX5Rc.js';
2
+ export { ANNBlocker, ANNBlockerBase, ANNBlockerOptions, AutoFixLog, AutoconfigOptions, BudgetSnapshot, BudgetTracker, BuildANNOptions, CCMSResult, ClusterExplanation, ColumnProfile, Correction, CreateANNBlockerOptions, CrossEncoderHttpError, CrossEncoderModel, CrossEncoderModelOptions, CrossEncoderOptions, CrossEncoderProvider, CrossEncoderReranker, DatasetProfile, DomainProfile, EMResult, Embedder, EmbedderError, EmbedderOptions, EmbedderProvider, EmbeddingResult, EvalResult, GatedResult, GraphERResult, HNSWANNBlocker, HNSWIndexLike, HNSWModule, HNSWOptions, LLMScoreResult, LearnedParams, LearnedPredicate, LearnedRules, LineageBundle, LineageEdge, MemoryLearner, MemoryStore, MemoryStoreConfig, PPRLConfig, PPRLResult, PairExplanation, QualityFinding, Relationship, ReviewItem, ReviewQueue, SensitivityResult, StreamProcessor, SweepParam, SweepPoint, TableSchema, TabularData, UnionFind, ValidationReport, ValidationRule, _resetCrossEncoderModelCache, addRowIds, addSourceColumn, addToCluster, applyColumnMap, applyCorrections, applyLearnedBlocks, applyStandardization, applyStandardizer, applyTransform, applyTransforms, asString, autoConfigurePPRL, autoConfigureRows, autoFixRows, buildANNBlocks, buildANNPairBlocks, buildAdaptiveBlocks, buildBlocks, buildBlocksAsync, buildClusters, buildComparisonVector, buildGoldenRecord, buildGoldenRecordWithProvenance, buildLineage, buildMst, buildMultiPassBlocks, buildStaticBlocks, compareClusters, computeClusterConfidence, computeMatchkeyValue, computeMatchkeys, concatRows, configToYaml, cosineSim, countTokensApprox, createANNBlocker, dedupe, detectDomain, diceCoefficient, ensembleScore, euclideanDist, evaluateClusters, evaluatePairs, explainCluster, explainPair, extractFeatures, findExactMatches, findExactMatchesOne, findFuzzyMatches, gatePairs, getClusterPairScores, getEmbedder, hashRow, indelDistance, indelSimilarity, isNullish, jaccardSimilarity, jaro, jaroWinkler, learnBlockingRules, levenshteinDistance, levenshteinSimilarity, lineageFromJson, lineageToJson, llmClusterPairs, llmScorePairs, loadGroundTruthPairs, match, matchOne, mergeField, metaphone, pairKey, parseConfig, parseConfigYaml, parsePairKey, profileRows, rerankPair, rerankTopPairs, runDedupePipeline, runGraphER, runMatchPipeline, runPPRL, runQualityCheck, runSensitivity, scanQuality, scoreBlocksSequential, scoreField, scoreMatrix, scorePair, scorePairRecord, scoreProbabilistic, scoreStrings, scoreStringsWithLlm, selectBestBlockingKey, soundex, soundexMatch, splitOversizedCluster, stabilityReport, toColumnValue, tokenSortRatio, trainEM, unmergeCluster, unmergeRecord, validateColumns, validateRows } from './core/index.js';