@struktur/sdk 2.1.2 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. package/dist/artifacts/fileToArtifact.d.ts +8 -0
  2. package/dist/artifacts/fileToArtifact.d.ts.map +1 -0
  3. package/dist/artifacts/input.d.ts +60 -0
  4. package/dist/artifacts/input.d.ts.map +1 -0
  5. package/{src/artifacts/providers.ts → dist/artifacts/providers.d.ts} +2 -4
  6. package/dist/artifacts/providers.d.ts.map +1 -0
  7. package/dist/artifacts/urlToArtifact.d.ts +3 -0
  8. package/dist/artifacts/urlToArtifact.d.ts.map +1 -0
  9. package/dist/auth/config.d.ts +34 -0
  10. package/dist/auth/config.d.ts.map +1 -0
  11. package/dist/auth/tokens.d.ts +18 -0
  12. package/dist/auth/tokens.d.ts.map +1 -0
  13. package/dist/chunking/ArtifactBatcher.d.ts +11 -0
  14. package/dist/chunking/ArtifactBatcher.d.ts.map +1 -0
  15. package/dist/chunking/ArtifactSplitter.d.ts +10 -0
  16. package/dist/chunking/ArtifactSplitter.d.ts.map +1 -0
  17. package/dist/debug/logger.d.ts +169 -0
  18. package/dist/debug/logger.d.ts.map +1 -0
  19. package/dist/extract.d.ts +3 -0
  20. package/dist/extract.d.ts.map +1 -0
  21. package/dist/fields.d.ts +75 -0
  22. package/dist/fields.d.ts.map +1 -0
  23. package/dist/index.d.ts +24 -0
  24. package/dist/index.d.ts.map +1 -0
  25. package/dist/index.js +5603 -0
  26. package/dist/index.js.map +1 -0
  27. package/dist/llm/LLMClient.d.ts +40 -0
  28. package/dist/llm/LLMClient.d.ts.map +1 -0
  29. package/dist/llm/RetryingRunner.d.ts +37 -0
  30. package/dist/llm/RetryingRunner.d.ts.map +1 -0
  31. package/dist/llm/message.d.ts +12 -0
  32. package/dist/llm/message.d.ts.map +1 -0
  33. package/dist/llm/models.d.ts +13 -0
  34. package/dist/llm/models.d.ts.map +1 -0
  35. package/dist/llm/resolveModel.d.ts +3 -0
  36. package/dist/llm/resolveModel.d.ts.map +1 -0
  37. package/dist/merge/Deduplicator.d.ts +4 -0
  38. package/dist/merge/Deduplicator.d.ts.map +1 -0
  39. package/dist/merge/SmartDataMerger.d.ts +7 -0
  40. package/dist/merge/SmartDataMerger.d.ts.map +1 -0
  41. package/dist/parsers/collect.d.ts +7 -0
  42. package/dist/parsers/collect.d.ts.map +1 -0
  43. package/{src/parsers/index.ts → dist/parsers/index.d.ts} +1 -0
  44. package/dist/parsers/index.d.ts.map +1 -0
  45. package/dist/parsers/mime.d.ts +12 -0
  46. package/dist/parsers/mime.d.ts.map +1 -0
  47. package/dist/parsers/npm.d.ts +16 -0
  48. package/dist/parsers/npm.d.ts.map +1 -0
  49. package/dist/parsers/pdf.d.ts +36 -0
  50. package/dist/parsers/pdf.d.ts.map +1 -0
  51. package/dist/parsers/runner.d.ts +4 -0
  52. package/dist/parsers/runner.d.ts.map +1 -0
  53. package/dist/parsers/types.d.ts +27 -0
  54. package/dist/parsers/types.d.ts.map +1 -0
  55. package/dist/parsers.d.ts +1 -0
  56. package/dist/parsers.js +492 -0
  57. package/dist/parsers.js.map +1 -0
  58. package/dist/prompts/DeduplicationPrompt.d.ts +5 -0
  59. package/dist/prompts/DeduplicationPrompt.d.ts.map +1 -0
  60. package/dist/prompts/ExtractorPrompt.d.ts +6 -0
  61. package/dist/prompts/ExtractorPrompt.d.ts.map +1 -0
  62. package/dist/prompts/ParallelMergerPrompt.d.ts +5 -0
  63. package/dist/prompts/ParallelMergerPrompt.d.ts.map +1 -0
  64. package/dist/prompts/SequentialExtractorPrompt.d.ts +6 -0
  65. package/dist/prompts/SequentialExtractorPrompt.d.ts.map +1 -0
  66. package/dist/prompts/formatArtifacts.d.ts +3 -0
  67. package/dist/prompts/formatArtifacts.d.ts.map +1 -0
  68. package/dist/strategies/DoublePassAutoMergeStrategy.d.ts +23 -0
  69. package/dist/strategies/DoublePassAutoMergeStrategy.d.ts.map +1 -0
  70. package/dist/strategies/DoublePassStrategy.d.ts +22 -0
  71. package/dist/strategies/DoublePassStrategy.d.ts.map +1 -0
  72. package/dist/strategies/ParallelAutoMergeStrategy.d.ts +27 -0
  73. package/dist/strategies/ParallelAutoMergeStrategy.d.ts.map +1 -0
  74. package/dist/strategies/ParallelStrategy.d.ts +22 -0
  75. package/dist/strategies/ParallelStrategy.d.ts.map +1 -0
  76. package/dist/strategies/SequentialAutoMergeStrategy.d.ts +22 -0
  77. package/dist/strategies/SequentialAutoMergeStrategy.d.ts.map +1 -0
  78. package/dist/strategies/SequentialStrategy.d.ts +20 -0
  79. package/dist/strategies/SequentialStrategy.d.ts.map +1 -0
  80. package/dist/strategies/SimpleStrategy.d.ts +18 -0
  81. package/dist/strategies/SimpleStrategy.d.ts.map +1 -0
  82. package/dist/strategies/agent/AgentStrategy.d.ts +44 -0
  83. package/dist/strategies/agent/AgentStrategy.d.ts.map +1 -0
  84. package/dist/strategies/agent/AgentTools.d.ts +55 -0
  85. package/dist/strategies/agent/AgentTools.d.ts.map +1 -0
  86. package/dist/strategies/agent/ArtifactFilesystem.d.ts +51 -0
  87. package/dist/strategies/agent/ArtifactFilesystem.d.ts.map +1 -0
  88. package/dist/strategies/agent/index.d.ts +4 -0
  89. package/dist/strategies/agent/index.d.ts.map +1 -0
  90. package/dist/strategies/concurrency.d.ts +2 -0
  91. package/dist/strategies/concurrency.d.ts.map +1 -0
  92. package/{src/strategies/index.ts → dist/strategies/index.d.ts} +2 -0
  93. package/dist/strategies/index.d.ts.map +1 -0
  94. package/dist/strategies/utils.d.ts +39 -0
  95. package/dist/strategies/utils.d.ts.map +1 -0
  96. package/dist/strategies.d.ts +1 -0
  97. package/dist/strategies.js +3930 -0
  98. package/dist/strategies.js.map +1 -0
  99. package/dist/tokenization.d.ts +11 -0
  100. package/dist/tokenization.d.ts.map +1 -0
  101. package/dist/types.d.ts +178 -0
  102. package/dist/types.d.ts.map +1 -0
  103. package/dist/validation/validator.d.ts +20 -0
  104. package/dist/validation/validator.d.ts.map +1 -0
  105. package/package.json +30 -14
  106. package/src/agent-cli-integration.test.ts +0 -47
  107. package/src/agent-export.test.ts +0 -17
  108. package/src/agent-tool-labels.test.ts +0 -50
  109. package/src/artifacts/AGENTS.md +0 -16
  110. package/src/artifacts/fileToArtifact.test.ts +0 -37
  111. package/src/artifacts/fileToArtifact.ts +0 -44
  112. package/src/artifacts/input.test.ts +0 -243
  113. package/src/artifacts/input.ts +0 -360
  114. package/src/artifacts/providers.test.ts +0 -19
  115. package/src/artifacts/urlToArtifact.test.ts +0 -23
  116. package/src/artifacts/urlToArtifact.ts +0 -19
  117. package/src/auth/AGENTS.md +0 -11
  118. package/src/auth/config.test.ts +0 -132
  119. package/src/auth/config.ts +0 -186
  120. package/src/auth/tokens.test.ts +0 -58
  121. package/src/auth/tokens.ts +0 -229
  122. package/src/chunking/AGENTS.md +0 -11
  123. package/src/chunking/ArtifactBatcher.test.ts +0 -22
  124. package/src/chunking/ArtifactBatcher.ts +0 -110
  125. package/src/chunking/ArtifactSplitter.test.ts +0 -38
  126. package/src/chunking/ArtifactSplitter.ts +0 -151
  127. package/src/debug/AGENTS.md +0 -79
  128. package/src/debug/logger.test.ts +0 -244
  129. package/src/debug/logger.ts +0 -211
  130. package/src/extract.test.ts +0 -22
  131. package/src/extract.ts +0 -150
  132. package/src/fields.test.ts +0 -681
  133. package/src/fields.ts +0 -246
  134. package/src/index.test.ts +0 -20
  135. package/src/index.ts +0 -110
  136. package/src/llm/AGENTS.md +0 -9
  137. package/src/llm/LLMClient.test.ts +0 -394
  138. package/src/llm/LLMClient.ts +0 -264
  139. package/src/llm/RetryingRunner.test.ts +0 -174
  140. package/src/llm/RetryingRunner.ts +0 -270
  141. package/src/llm/message.test.ts +0 -42
  142. package/src/llm/message.ts +0 -47
  143. package/src/llm/models.test.ts +0 -82
  144. package/src/llm/models.ts +0 -190
  145. package/src/llm/resolveModel.ts +0 -86
  146. package/src/merge/AGENTS.md +0 -6
  147. package/src/merge/Deduplicator.test.ts +0 -108
  148. package/src/merge/Deduplicator.ts +0 -45
  149. package/src/merge/SmartDataMerger.test.ts +0 -177
  150. package/src/merge/SmartDataMerger.ts +0 -56
  151. package/src/parsers/AGENTS.md +0 -58
  152. package/src/parsers/collect.test.ts +0 -56
  153. package/src/parsers/collect.ts +0 -31
  154. package/src/parsers/mime.test.ts +0 -91
  155. package/src/parsers/mime.ts +0 -137
  156. package/src/parsers/npm.ts +0 -26
  157. package/src/parsers/pdf.test.ts +0 -394
  158. package/src/parsers/pdf.ts +0 -194
  159. package/src/parsers/runner.test.ts +0 -95
  160. package/src/parsers/runner.ts +0 -177
  161. package/src/parsers/types.ts +0 -29
  162. package/src/prompts/AGENTS.md +0 -8
  163. package/src/prompts/DeduplicationPrompt.test.ts +0 -41
  164. package/src/prompts/DeduplicationPrompt.ts +0 -37
  165. package/src/prompts/ExtractorPrompt.test.ts +0 -21
  166. package/src/prompts/ExtractorPrompt.ts +0 -72
  167. package/src/prompts/ParallelMergerPrompt.test.ts +0 -8
  168. package/src/prompts/ParallelMergerPrompt.ts +0 -37
  169. package/src/prompts/SequentialExtractorPrompt.test.ts +0 -24
  170. package/src/prompts/SequentialExtractorPrompt.ts +0 -82
  171. package/src/prompts/formatArtifacts.test.ts +0 -39
  172. package/src/prompts/formatArtifacts.ts +0 -46
  173. package/src/strategies/AGENTS.md +0 -6
  174. package/src/strategies/DoublePassAutoMergeStrategy.test.ts +0 -53
  175. package/src/strategies/DoublePassAutoMergeStrategy.ts +0 -410
  176. package/src/strategies/DoublePassStrategy.test.ts +0 -48
  177. package/src/strategies/DoublePassStrategy.ts +0 -266
  178. package/src/strategies/ParallelAutoMergeStrategy.test.ts +0 -152
  179. package/src/strategies/ParallelAutoMergeStrategy.ts +0 -345
  180. package/src/strategies/ParallelStrategy.test.ts +0 -61
  181. package/src/strategies/ParallelStrategy.ts +0 -208
  182. package/src/strategies/SequentialAutoMergeStrategy.test.ts +0 -66
  183. package/src/strategies/SequentialAutoMergeStrategy.ts +0 -325
  184. package/src/strategies/SequentialStrategy.test.ts +0 -53
  185. package/src/strategies/SequentialStrategy.ts +0 -142
  186. package/src/strategies/SimpleStrategy.test.ts +0 -46
  187. package/src/strategies/SimpleStrategy.ts +0 -94
  188. package/src/strategies/concurrency.test.ts +0 -16
  189. package/src/strategies/concurrency.ts +0 -14
  190. package/src/strategies/index.test.ts +0 -20
  191. package/src/strategies/utils.test.ts +0 -76
  192. package/src/strategies/utils.ts +0 -95
  193. package/src/tokenization.test.ts +0 -119
  194. package/src/tokenization.ts +0 -71
  195. package/src/types.test.ts +0 -25
  196. package/src/types.ts +0 -174
  197. package/src/validation/AGENTS.md +0 -7
  198. package/src/validation/validator.test.ts +0 -204
  199. package/src/validation/validator.ts +0 -90
  200. package/tsconfig.json +0 -22
package/src/llm/models.ts DELETED
@@ -1,190 +0,0 @@
1
- import type { ProviderModelsResult } from "../types";
2
- import { resolveProviderEnvVar, resolveProviderToken } from "../auth/tokens";
3
-
4
- const openAiModelsUrl = "https://api.openai.com/v1/models";
5
- const anthropicModelsUrl = "https://api.anthropic.com/v1/models";
6
- const googleModelsUrl = "https://generativelanguage.googleapis.com/v1beta/models";
7
- const openRouterModelsUrl = "https://openrouter.ai/api/v1/models";
8
-
9
- const getTokenForProvider = async (provider: string) => {
10
- const envVar = resolveProviderEnvVar(provider);
11
- if (envVar && process.env[envVar]) {
12
- return process.env[envVar] as string;
13
- }
14
- return await resolveProviderToken(provider);
15
- };
16
-
17
- const parseOpenAiModels = (json: unknown) => {
18
- const data = (json as { data?: Array<{ id?: string }> } | undefined)?.data ?? [];
19
- return data.map((item) => item.id).filter((id): id is string => typeof id === "string");
20
- };
21
-
22
- const parseAnthropicModels = (json: unknown) => {
23
- const data = (json as { data?: Array<{ id?: string }> } | undefined)?.data ?? [];
24
- return data.map((item) => item.id).filter((id): id is string => typeof id === "string");
25
- };
26
-
27
- const parseGoogleModels = (json: unknown) => {
28
- const data = (json as { models?: Array<{ name?: string }> } | undefined)?.models ?? [];
29
- return data
30
- .map((item) => item.name)
31
- .filter((name): name is string => typeof name === "string")
32
- .map((name) => name.replace(/^models\//, ""));
33
- };
34
-
35
- const parseOpenRouterModels = (json: unknown) => {
36
- const data = (json as { data?: Array<{ id?: string }> } | undefined)?.data ?? [];
37
- return data.map((item) => item.id).filter((id): id is string => typeof id === "string");
38
- };
39
-
40
- const requestModels = async (provider: string, token: string): Promise<string[]> => {
41
- if (provider === "openai") {
42
- const response = await fetch(openAiModelsUrl, {
43
- headers: { Authorization: `Bearer ${token}` },
44
- });
45
- if (!response.ok) {
46
- throw new Error(await response.text());
47
- }
48
- const json = (await response.json()) as unknown;
49
- return parseOpenAiModels(json);
50
- }
51
-
52
- if (provider === "anthropic") {
53
- const response = await fetch(anthropicModelsUrl, {
54
- headers: {
55
- "x-api-key": token,
56
- "anthropic-version": "2023-06-01",
57
- },
58
- });
59
- if (!response.ok) {
60
- throw new Error(await response.text());
61
- }
62
- const json = (await response.json()) as unknown;
63
- return parseAnthropicModels(json);
64
- }
65
-
66
- if (provider === "google") {
67
- const response = await fetch(`${googleModelsUrl}?key=${encodeURIComponent(token)}`);
68
- if (!response.ok) {
69
- throw new Error(await response.text());
70
- }
71
- const json = (await response.json()) as unknown;
72
- return parseGoogleModels(json);
73
- }
74
-
75
- if (provider === "openrouter") {
76
- const response = await fetch(openRouterModelsUrl, {
77
- headers: { Authorization: `Bearer ${token}` },
78
- });
79
- if (!response.ok) {
80
- throw new Error(await response.text());
81
- }
82
- const json = (await response.json()) as unknown;
83
- return parseOpenRouterModels(json);
84
- }
85
-
86
- if (provider === "opencode") {
87
- // OpenCode doesn't have a public models endpoint, return known models
88
- return [
89
- "gpt-5.2",
90
- "gpt-5.2-codex",
91
- "gpt-5.1",
92
- "gpt-5.1-codex",
93
- "gpt-5.1-codex-max",
94
- "gpt-5.1-codex-mini",
95
- "gpt-5",
96
- "gpt-5-codex",
97
- "gpt-5-nano",
98
- "claude-opus-4-6",
99
- "claude-opus-4-5",
100
- "claude-opus-4-1",
101
- "claude-sonnet-4-6",
102
- "claude-sonnet-4-5",
103
- "claude-sonnet-4",
104
- "claude-haiku-4-5",
105
- "claude-haiku-3.5",
106
- "gemini-3.1-pro",
107
- "gemini-3-pro",
108
- "gemini-3-flash",
109
- "minimax-m2.5",
110
- "minimax-m2.5-free",
111
- "minimax-m2.1",
112
- "glm-5",
113
- "glm-5-free",
114
- "glm-4.7",
115
- "glm-4.6",
116
- "kimi-k2.5",
117
- "kimi-k2.5-free",
118
- "kimi-k2-thinking",
119
- "kimi-k2",
120
- "qwen3-coder",
121
- "big-pickle",
122
- ];
123
- }
124
-
125
- throw new Error(`Unsupported provider: ${provider}`);
126
- };
127
-
128
- const cheapestModelPreferences: Record<string, string[]> = {
129
- openai: ["gpt-4.1-nano", "gpt-4.1-mini", "gpt-4o-mini", "gpt-4o"],
130
- anthropic: ["claude-3-5-haiku", "claude-3-haiku"],
131
- google: ["gemini-1.5-flash-8b", "gemini-1.5-flash", "gemini-2.0-flash", "gemini-1.5-pro"],
132
- opencode: ["gpt-5-nano", "claude-haiku-3.5", "gemini-3-flash", "kimi-k2-free", "glm-5-free", "minimax-m2.5-free"],
133
- openrouter: ["openai/gpt-4o-mini", "anthropic/claude-3.5-haiku", "google/gemini-flash-1.5"],
134
- };
135
-
136
- const matchesPreference = (model: string, preference: string) => {
137
- return model === preference || model.startsWith(`${preference}-`);
138
- };
139
-
140
- export const listProviderModels = async (provider: string): Promise<ProviderModelsResult> => {
141
- const token = await getTokenForProvider(provider);
142
- if (!token) {
143
- return { provider, ok: false, error: "No token available" };
144
- }
145
-
146
- try {
147
- const models = await requestModels(provider, token);
148
- return { provider, ok: true, models };
149
- } catch (error) {
150
- const message = error instanceof Error ? error.message : String(error);
151
- return { provider, ok: false, error: message };
152
- }
153
- };
154
-
155
- export const listAllProviderModels = async (providers: string[]) => {
156
- const results = await Promise.all(providers.map((provider) => listProviderModels(provider)));
157
- return results;
158
- };
159
-
160
- export const pickCheapestModel = (provider: string, models: string[]) => {
161
- const preferences = cheapestModelPreferences[provider] ?? [];
162
- for (const preference of preferences) {
163
- const match = models.find((model) => matchesPreference(model, preference));
164
- if (match) {
165
- return match;
166
- }
167
- }
168
- return models[0];
169
- };
170
-
171
- export const resolveCheapestModel = async (provider: string) => {
172
- const result = await listProviderModels(provider);
173
- if (!result.ok) {
174
- throw new Error(result.error ?? `Unable to list models for provider: ${provider}`);
175
- }
176
- const models = result.models ?? [];
177
- const model = pickCheapestModel(provider, models);
178
- if (!model) {
179
- throw new Error(`No models available for provider: ${provider}`);
180
- }
181
- return model;
182
- };
183
-
184
- export const __testing__ = {
185
- parseOpenAiModels,
186
- parseAnthropicModels,
187
- parseGoogleModels,
188
- parseOpenRouterModels,
189
- pickCheapestModel,
190
- };
@@ -1,86 +0,0 @@
1
- import { resolveProviderEnvVar, resolveProviderToken } from "../auth/tokens";
2
-
3
- export const resolveModel = async (model: string) => {
4
- (globalThis as { AI_SDK_LOG_WARNINGS?: boolean }).AI_SDK_LOG_WARNINGS ??= false;
5
- process.env.AI_SDK_LOG_WARNINGS ??= "false";
6
- const [provider, ...rest] = model.split("/");
7
- const modelName = rest.join("/");
8
-
9
- if (!provider || !modelName) {
10
- throw new Error(`Invalid model format: ${model}. Expected format: provider/model (e.g., openai/gpt-4)`);
11
- }
12
-
13
- const envVar = resolveProviderEnvVar(provider);
14
- if (envVar && !process.env[envVar]) {
15
- const storedToken = await resolveProviderToken(provider);
16
- if (storedToken) {
17
- process.env[envVar] = storedToken;
18
- }
19
- }
20
-
21
- switch (provider) {
22
- case "openai": {
23
- const { openai } = await import("@ai-sdk/openai");
24
- return openai(modelName);
25
- }
26
- case "anthropic": {
27
- const { anthropic } = await import("@ai-sdk/anthropic");
28
- return anthropic(modelName);
29
- }
30
- case "google": {
31
- const { google } = await import("@ai-sdk/google");
32
- return google(modelName);
33
- }
34
- case "opencode": {
35
- const envVar = resolveProviderEnvVar("opencode");
36
- let apiKey = envVar ? process.env[envVar] : undefined;
37
- if (!apiKey) {
38
- apiKey = await resolveProviderToken("opencode");
39
- }
40
- if (!apiKey) {
41
- throw new Error("OpenCode API key is required. Set OPENCODE_API_KEY environment variable or run 'struktur auth set --provider opencode --token <token>'");
42
- }
43
-
44
- if (modelName.startsWith("claude-")) {
45
- const { createAnthropic } = await import("@ai-sdk/anthropic");
46
- return createAnthropic({
47
- apiKey,
48
- baseURL: "https://opencode.ai/zen/v1",
49
- })(modelName);
50
- } else if (modelName.startsWith("gemini-")) {
51
- const { createGoogleGenerativeAI } = await import("@ai-sdk/google");
52
- return createGoogleGenerativeAI({
53
- apiKey,
54
- baseURL: "https://opencode.ai/zen/v1",
55
- })(modelName);
56
- } else {
57
- const { createOpenAI } = await import("@ai-sdk/openai");
58
- return createOpenAI({
59
- apiKey,
60
- baseURL: "https://opencode.ai/zen/v1",
61
- })(modelName);
62
- }
63
- }
64
- case "openrouter": {
65
- const { openrouter } = await import("@openrouter/ai-sdk-provider");
66
- const hashIndex = modelName.indexOf("#");
67
- const actualModelName = hashIndex >= 0 ? modelName.slice(0, hashIndex) : modelName;
68
- const preferredProvider = hashIndex >= 0 ? modelName.slice(hashIndex + 1) : undefined;
69
-
70
- const modelInstance = openrouter(actualModelName);
71
-
72
- if (preferredProvider) {
73
- Object.defineProperty(modelInstance, "__openrouter_provider", {
74
- value: preferredProvider,
75
- writable: false,
76
- enumerable: false,
77
- configurable: false,
78
- });
79
- }
80
-
81
- return modelInstance;
82
- }
83
- default:
84
- throw new Error(`Unsupported model provider: ${provider}. Supported providers: openai, anthropic, google, opencode, openrouter`);
85
- }
86
- };
@@ -1,6 +0,0 @@
1
- Merge module
2
-
3
- - Purpose: schema-aware merging and deduplication of extracted data.
4
- - Key files: `SmartDataMerger.ts`, `Deduplicator.ts`.
5
- - Design: arrays concatenate, objects shallow-merge, scalars prefer new values; dedupe uses CRC32 hashing.
6
- - Tests: `SmartDataMerger.test.ts`, `Deduplicator.test.ts`.
@@ -1,108 +0,0 @@
1
- import { test, expect } from "bun:test";
2
- import { findExactDuplicatesWithHashing, deduplicateByIndices, fnv1a32 } from "./Deduplicator";
3
-
4
- test("fnv1a32: official test vectors from lcn2/fnv", () => {
5
- expect(fnv1a32("")).toBe(0x811c9dc5);
6
- expect(fnv1a32("a")).toBe(0xe40c292c);
7
- expect(fnv1a32("b")).toBe(0xe70c2de5);
8
- expect(fnv1a32("c")).toBe(0xe60c2c52);
9
- expect(fnv1a32("d")).toBe(0xe10c2473);
10
- expect(fnv1a32("e")).toBe(0xe00c22e0);
11
- expect(fnv1a32("f")).toBe(0xe30c2799);
12
- expect(fnv1a32("fo")).toBe(0x6222e842);
13
- expect(fnv1a32("foo")).toBe(0xa9f37ed7);
14
- expect(fnv1a32("foob")).toBe(0x3f5076ef);
15
- expect(fnv1a32("fooba")).toBe(0x39aaa18a);
16
- expect(fnv1a32("foobar")).toBe(0xbf9cf968);
17
- expect(fnv1a32("chongo was here!\n")).toBe(0xd49930d5);
18
- });
19
-
20
- test("fnv1a32: consistent results", () => {
21
- const str = "test string for consistency";
22
- const hash1 = fnv1a32(str);
23
- const hash2 = fnv1a32(str);
24
- const hash3 = fnv1a32(str);
25
- expect(hash1).toBe(hash2);
26
- expect(hash2).toBe(hash3);
27
- });
28
-
29
- test("fnv1a32: different strings produce different hashes", () => {
30
- const strings = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j"];
31
- const hashes = strings.map(fnv1a32);
32
- const uniqueHashes = new Set(hashes);
33
- expect(uniqueHashes.size).toBe(strings.length);
34
- });
35
-
36
- test("fnv1a32: handles unicode", () => {
37
- const hash1 = fnv1a32("hello");
38
- const hash2 = fnv1a32("héllo");
39
- const hash3 = fnv1a32("你好");
40
- expect(hash1).not.toBe(hash2);
41
- expect(typeof hash3).toBe("number");
42
- expect(hash3).toBeGreaterThan(0);
43
- });
44
-
45
- test("fnv1a32: handles special characters", () => {
46
- const hash1 = fnv1a32('{"key":"value"}');
47
- const hash2 = fnv1a32('{"key":"value2"}');
48
- expect(hash1).not.toBe(hash2);
49
- });
50
-
51
- test("fnv1a32: returns unsigned 32-bit integer", () => {
52
- const hash = fnv1a32("some test string");
53
- expect(hash).toBeGreaterThanOrEqual(0);
54
- expect(hash).toBeLessThan(4294967296);
55
- expect(Number.isInteger(hash)).toBe(true);
56
- });
57
-
58
- test("fnv1a32: collision resistance for similar strings", () => {
59
- const strings = [
60
- "item1", "item2", "item3", "item4", "item5",
61
- "item6", "item7", "item8", "item9", "item10",
62
- "Item1", "ITEM1", "itemA", "itemB", "itemC",
63
- ];
64
- const hashes = strings.map(fnv1a32);
65
- const uniqueHashes = new Set(hashes);
66
- expect(uniqueHashes.size).toBe(strings.length);
67
- });
68
-
69
- test("findExactDuplicatesWithHashing finds duplicates", () => {
70
- const duplicates = findExactDuplicatesWithHashing([
71
- { id: 1, name: "A" },
72
- { id: 1, name: "A" },
73
- { id: 2, name: "B" },
74
- ]);
75
-
76
- expect(duplicates).toEqual([1]);
77
- });
78
-
79
- test("deduplicateByIndices removes by index", () => {
80
- const items = ["a", "b", "c"];
81
- const result = deduplicateByIndices(items, [1]);
82
- expect(result).toEqual(["a", "c"]);
83
- });
84
-
85
- test("findExactDuplicatesWithHashing handles empty array", () => {
86
- expect(findExactDuplicatesWithHashing([])).toEqual([]);
87
- });
88
-
89
- test("findExactDuplicatesWithHashing handles no duplicates", () => {
90
- expect(findExactDuplicatesWithHashing([1, 2, 3])).toEqual([]);
91
- });
92
-
93
- test("findExactDuplicatesWithHashing handles all duplicates", () => {
94
- expect(findExactDuplicatesWithHashing([1, 1, 1])).toEqual([1, 2]);
95
- });
96
-
97
- test("findExactDuplicatesWithHashing handles complex objects", () => {
98
- const obj = { a: 1, b: { c: 2 } };
99
- const duplicates = findExactDuplicatesWithHashing([obj, obj, { a: 1, b: { c: 3 } }]);
100
- expect(duplicates).toEqual([1]);
101
- });
102
-
103
- test("findExactDuplicatesWithHashing: key order doesn't matter", () => {
104
- const obj1 = { a: 1, b: 2 };
105
- const obj2 = { b: 2, a: 1 };
106
- const duplicates = findExactDuplicatesWithHashing([obj1, obj2]);
107
- expect(duplicates).toEqual([1]);
108
- });
@@ -1,45 +0,0 @@
1
- export const fnv1a32 = (str: string): number => {
2
- let hash = 2166136261;
3
- for (let i = 0; i < str.length; i++) {
4
- hash ^= str.charCodeAt(i);
5
- hash = Math.imul(hash, 16777619);
6
- }
7
- return hash >>> 0;
8
- };
9
-
10
- const stableStringify = (value: unknown): string => {
11
- if (value === null || typeof value !== "object") {
12
- return JSON.stringify(value);
13
- }
14
-
15
- if (Array.isArray(value)) {
16
- return `[${value.map((item) => stableStringify(item)).join(",")}]`;
17
- }
18
-
19
- const entries = Object.entries(value as Record<string, unknown>)
20
- .sort(([a], [b]) => a.localeCompare(b))
21
- .map(([key, val]) => `"${key}":${stableStringify(val)}`);
22
-
23
- return `{${entries.join(",")}}`;
24
- };
25
-
26
- export const findExactDuplicatesWithHashing = (items: unknown[]) => {
27
- const seen = new Map<number, number>();
28
- const duplicates: number[] = [];
29
-
30
- items.forEach((item, index) => {
31
- const hash = fnv1a32(stableStringify(item));
32
- if (seen.has(hash)) {
33
- duplicates.push(index);
34
- return;
35
- }
36
- seen.set(hash, index);
37
- });
38
-
39
- return duplicates;
40
- };
41
-
42
- export const deduplicateByIndices = <T>(items: T[], indices: number[]) => {
43
- const remove = new Set(indices);
44
- return items.filter((_, index) => !remove.has(index));
45
- };
@@ -1,177 +0,0 @@
1
- import { test, expect } from "bun:test";
2
- import { SmartDataMerger } from "./SmartDataMerger";
3
-
4
- test("SmartDataMerger concatenates arrays and preserves scalars", () => {
5
- const schema = {
6
- type: "object",
7
- properties: {
8
- items: { type: "array" },
9
- title: { type: "string" },
10
- },
11
- };
12
-
13
- const merger = new SmartDataMerger(schema);
14
- const result = merger.merge(
15
- { items: [1], title: "A" },
16
- { items: [2], title: "" }
17
- );
18
-
19
- expect(result.items).toEqual([1, 2]);
20
- expect(result.title).toBe("A");
21
- });
22
-
23
- test("SmartDataMerger merges nested objects", () => {
24
- const schema = {
25
- type: "object",
26
- properties: {
27
- user: {
28
- type: "object",
29
- properties: {
30
- name: { type: "string" },
31
- email: { type: "string" },
32
- },
33
- },
34
- },
35
- };
36
-
37
- const merger = new SmartDataMerger(schema);
38
- const result = merger.merge(
39
- { user: { name: "Alice" } },
40
- { user: { email: "alice@example.com" } }
41
- );
42
-
43
- expect(result.user).toEqual({ name: "Alice", email: "alice@example.com" });
44
- });
45
-
46
- test("SmartDataMerger prefers new scalar values when not empty", () => {
47
- const schema = {
48
- type: "object",
49
- properties: {
50
- title: { type: "string" },
51
- count: { type: "number" },
52
- },
53
- };
54
-
55
- const merger = new SmartDataMerger(schema);
56
- const result = merger.merge(
57
- { title: "Old", count: 1 },
58
- { title: "New", count: 2 }
59
- );
60
-
61
- expect(result.title).toBe("New");
62
- expect(result.count).toBe(2);
63
- });
64
-
65
- test("SmartDataMerger preserves old value when new is null", () => {
66
- const schema = {
67
- type: "object",
68
- properties: {
69
- title: { type: "string" },
70
- },
71
- };
72
-
73
- const merger = new SmartDataMerger(schema);
74
- const result = merger.merge({ title: "Old" }, { title: null });
75
-
76
- expect(result.title).toBe("Old");
77
- });
78
-
79
- test("SmartDataMerger preserves old value when new is undefined", () => {
80
- const schema = {
81
- type: "object",
82
- properties: {
83
- title: { type: "string" },
84
- },
85
- };
86
-
87
- const merger = new SmartDataMerger(schema);
88
- const result = merger.merge({ title: "Old" }, {});
89
-
90
- expect(result.title).toBe("Old");
91
- });
92
-
93
- test("SmartDataMerger handles missing current value for arrays", () => {
94
- const schema = {
95
- type: "object",
96
- properties: {
97
- items: { type: "array" },
98
- },
99
- };
100
-
101
- const merger = new SmartDataMerger(schema);
102
- const result = merger.merge({}, { items: [1, 2] });
103
-
104
- expect(result.items).toEqual([1, 2]);
105
- });
106
-
107
- test("SmartDataMerger handles missing new value for arrays", () => {
108
- const schema = {
109
- type: "object",
110
- properties: {
111
- items: { type: "array" },
112
- },
113
- };
114
-
115
- const merger = new SmartDataMerger(schema);
116
- const result = merger.merge({ items: [1, 2] }, {});
117
-
118
- expect(result.items).toEqual([1, 2]);
119
- });
120
-
121
- test("SmartDataMerger handles non-array values for array schema", () => {
122
- const schema = {
123
- type: "object",
124
- properties: {
125
- items: { type: "array" },
126
- },
127
- };
128
-
129
- const merger = new SmartDataMerger(schema);
130
- const result = merger.merge({ items: "not-an-array" }, { items: [1] });
131
-
132
- expect(result.items).toEqual([1]);
133
- });
134
-
135
- test("SmartDataMerger handles non-object values for object schema", () => {
136
- const schema = {
137
- type: "object",
138
- properties: {
139
- user: { type: "object", properties: {} },
140
- },
141
- };
142
-
143
- const merger = new SmartDataMerger(schema);
144
- const result = merger.merge({ user: "not-an-object" }, { user: { name: "Alice" } });
145
-
146
- expect(result.user).toEqual({ name: "Alice" });
147
- });
148
-
149
- test("SmartDataMerger preserves properties not in schema", () => {
150
- const schema = {
151
- type: "object",
152
- properties: {
153
- title: { type: "string" },
154
- },
155
- };
156
-
157
- const merger = new SmartDataMerger(schema);
158
- const result = merger.merge(
159
- { title: "A", extra: "preserved" },
160
- { title: "B" }
161
- );
162
-
163
- expect(result.title).toBe("B");
164
- expect(result.extra).toBe("preserved");
165
- });
166
-
167
- test("SmartDataMerger handles empty schema properties", () => {
168
- const schema = {
169
- type: "object",
170
- properties: {},
171
- };
172
-
173
- const merger = new SmartDataMerger(schema);
174
- const result = merger.merge({ title: "A" }, { title: "B" });
175
-
176
- expect(result.title).toBe("A");
177
- });
@@ -1,56 +0,0 @@
1
- import type { AnyJSONSchema } from "../types";
2
-
3
- const isArraySchema = (schema: Record<string, unknown>) => {
4
- if (schema.type === "array") {
5
- return true;
6
- }
7
- return false;
8
- };
9
-
10
- const isObjectSchema = (schema: Record<string, unknown>) => {
11
- return schema.type === "object" && typeof schema.properties === "object";
12
- };
13
-
14
- export class SmartDataMerger {
15
- private schema: AnyJSONSchema;
16
-
17
- constructor(schema: AnyJSONSchema) {
18
- this.schema = schema;
19
- }
20
-
21
- merge(currentData: Record<string, unknown>, newData: Record<string, unknown>) {
22
- const merged: Record<string, unknown> = { ...currentData };
23
- const properties =
24
- (this.schema as { properties?: Record<string, Record<string, unknown>> })
25
- .properties ?? {};
26
-
27
- for (const [key, propSchema] of Object.entries(properties)) {
28
- const currentValue = currentData[key];
29
- const newValue = newData[key];
30
-
31
- if (isArraySchema(propSchema)) {
32
- merged[key] = [
33
- ...(Array.isArray(currentValue) ? currentValue : []),
34
- ...(Array.isArray(newValue) ? newValue : []),
35
- ];
36
- continue;
37
- }
38
-
39
- if (isObjectSchema(propSchema)) {
40
- merged[key] = {
41
- ...(typeof currentValue === "object" && currentValue ? currentValue : {}),
42
- ...(typeof newValue === "object" && newValue ? newValue : {}),
43
- };
44
- continue;
45
- }
46
-
47
- if (newValue !== undefined && newValue !== null && newValue !== "") {
48
- merged[key] = newValue;
49
- } else if (currentValue !== undefined) {
50
- merged[key] = currentValue;
51
- }
52
- }
53
-
54
- return merged;
55
- }
56
- }