observability-toolkit 1.8.0 → 1.8.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. package/README.md +81 -3
  2. package/dist/backends/index.d.ts +119 -0
  3. package/dist/backends/index.d.ts.map +1 -1
  4. package/dist/backends/index.js +57 -0
  5. package/dist/backends/index.js.map +1 -1
  6. package/dist/backends/index.test.d.ts +5 -0
  7. package/dist/backends/index.test.d.ts.map +1 -0
  8. package/dist/backends/index.test.js +156 -0
  9. package/dist/backends/index.test.js.map +1 -0
  10. package/dist/backends/local-jsonl-boolean-search.test.js +8 -27
  11. package/dist/backends/local-jsonl-boolean-search.test.js.map +1 -1
  12. package/dist/backends/local-jsonl-logs.test.d.ts +2 -0
  13. package/dist/backends/local-jsonl-logs.test.d.ts.map +1 -0
  14. package/dist/backends/local-jsonl-logs.test.js +603 -0
  15. package/dist/backends/local-jsonl-logs.test.js.map +1 -0
  16. package/dist/backends/local-jsonl-traces.test.d.ts +2 -0
  17. package/dist/backends/local-jsonl-traces.test.d.ts.map +1 -0
  18. package/dist/backends/local-jsonl-traces.test.js +1723 -0
  19. package/dist/backends/local-jsonl-traces.test.js.map +1 -0
  20. package/dist/backends/local-jsonl.d.ts +4 -1
  21. package/dist/backends/local-jsonl.d.ts.map +1 -1
  22. package/dist/backends/local-jsonl.js +185 -1
  23. package/dist/backends/local-jsonl.js.map +1 -1
  24. package/dist/backends/local-jsonl.test.js +723 -46
  25. package/dist/backends/local-jsonl.test.js.map +1 -1
  26. package/dist/backends/signoz-api.d.ts +32 -0
  27. package/dist/backends/signoz-api.d.ts.map +1 -1
  28. package/dist/backends/signoz-api.js +231 -33
  29. package/dist/backends/signoz-api.js.map +1 -1
  30. package/dist/backends/signoz-api.test.js +410 -63
  31. package/dist/backends/signoz-api.test.js.map +1 -1
  32. package/dist/lib/constants.d.ts +59 -0
  33. package/dist/lib/constants.d.ts.map +1 -1
  34. package/dist/lib/constants.js +252 -6
  35. package/dist/lib/constants.js.map +1 -1
  36. package/dist/lib/constants.test.js +357 -21
  37. package/dist/lib/constants.test.js.map +1 -1
  38. package/dist/lib/edge-cases.test.d.ts +11 -0
  39. package/dist/lib/edge-cases.test.d.ts.map +1 -0
  40. package/dist/lib/edge-cases.test.js +634 -0
  41. package/dist/lib/edge-cases.test.js.map +1 -0
  42. package/dist/lib/error-sanitizer.d.ts +57 -0
  43. package/dist/lib/error-sanitizer.d.ts.map +1 -0
  44. package/dist/lib/error-sanitizer.js +207 -0
  45. package/dist/lib/error-sanitizer.js.map +1 -0
  46. package/dist/lib/error-sanitizer.test.d.ts +8 -0
  47. package/dist/lib/error-sanitizer.test.d.ts.map +1 -0
  48. package/dist/lib/error-sanitizer.test.js +369 -0
  49. package/dist/lib/error-sanitizer.test.js.map +1 -0
  50. package/dist/lib/file-utils.d.ts +134 -0
  51. package/dist/lib/file-utils.d.ts.map +1 -1
  52. package/dist/lib/file-utils.js +395 -9
  53. package/dist/lib/file-utils.js.map +1 -1
  54. package/dist/lib/file-utils.test.js +444 -3
  55. package/dist/lib/file-utils.test.js.map +1 -1
  56. package/dist/lib/indexer.d.ts +9 -1
  57. package/dist/lib/indexer.d.ts.map +1 -1
  58. package/dist/lib/indexer.js +51 -2
  59. package/dist/lib/indexer.js.map +1 -1
  60. package/dist/lib/indexer.test.js +138 -20
  61. package/dist/lib/indexer.test.js.map +1 -1
  62. package/dist/lib/input-validator.d.ts +103 -0
  63. package/dist/lib/input-validator.d.ts.map +1 -0
  64. package/dist/lib/input-validator.js +250 -0
  65. package/dist/lib/input-validator.js.map +1 -0
  66. package/dist/lib/input-validator.test.d.ts +2 -0
  67. package/dist/lib/input-validator.test.d.ts.map +1 -0
  68. package/dist/lib/input-validator.test.js +287 -0
  69. package/dist/lib/input-validator.test.js.map +1 -0
  70. package/dist/lib/query-sanitizer.d.ts +143 -0
  71. package/dist/lib/query-sanitizer.d.ts.map +1 -0
  72. package/dist/lib/query-sanitizer.js +261 -0
  73. package/dist/lib/query-sanitizer.js.map +1 -0
  74. package/dist/lib/query-sanitizer.test.d.ts +5 -0
  75. package/dist/lib/query-sanitizer.test.d.ts.map +1 -0
  76. package/dist/lib/query-sanitizer.test.js +400 -0
  77. package/dist/lib/query-sanitizer.test.js.map +1 -0
  78. package/dist/lib/server-utils.d.ts +80 -0
  79. package/dist/lib/server-utils.d.ts.map +1 -0
  80. package/dist/lib/server-utils.js +141 -0
  81. package/dist/lib/server-utils.js.map +1 -0
  82. package/dist/lib/shared-schemas.d.ts +59 -0
  83. package/dist/lib/shared-schemas.d.ts.map +1 -0
  84. package/dist/lib/shared-schemas.js +58 -0
  85. package/dist/lib/shared-schemas.js.map +1 -0
  86. package/dist/lib/shared-schemas.test.d.ts +5 -0
  87. package/dist/lib/shared-schemas.test.d.ts.map +1 -0
  88. package/dist/lib/shared-schemas.test.js +106 -0
  89. package/dist/lib/shared-schemas.test.js.map +1 -0
  90. package/dist/lib/toon-encoder.d.ts +21 -0
  91. package/dist/lib/toon-encoder.d.ts.map +1 -0
  92. package/dist/lib/toon-encoder.js +46 -0
  93. package/dist/lib/toon-encoder.js.map +1 -0
  94. package/dist/server.d.ts +1 -1
  95. package/dist/server.d.ts.map +1 -1
  96. package/dist/server.js +155 -81
  97. package/dist/server.js.map +1 -1
  98. package/dist/server.test.js +363 -0
  99. package/dist/server.test.js.map +1 -1
  100. package/dist/test-helpers/env-utils.d.ts +65 -0
  101. package/dist/test-helpers/env-utils.d.ts.map +1 -0
  102. package/dist/test-helpers/env-utils.js +94 -0
  103. package/dist/test-helpers/env-utils.js.map +1 -0
  104. package/dist/test-helpers/file-utils.d.ts +93 -0
  105. package/dist/test-helpers/file-utils.d.ts.map +1 -0
  106. package/dist/test-helpers/file-utils.js +206 -0
  107. package/dist/test-helpers/file-utils.js.map +1 -0
  108. package/dist/test-helpers/index.d.ts +10 -0
  109. package/dist/test-helpers/index.d.ts.map +1 -0
  110. package/dist/test-helpers/index.js +28 -0
  111. package/dist/test-helpers/index.js.map +1 -0
  112. package/dist/test-helpers/mock-backends.d.ts +139 -0
  113. package/dist/test-helpers/mock-backends.d.ts.map +1 -0
  114. package/dist/test-helpers/mock-backends.js +227 -0
  115. package/dist/test-helpers/mock-backends.js.map +1 -0
  116. package/dist/test-helpers/mock-backends.test.d.ts +5 -0
  117. package/dist/test-helpers/mock-backends.test.d.ts.map +1 -0
  118. package/dist/test-helpers/mock-backends.test.js +368 -0
  119. package/dist/test-helpers/mock-backends.test.js.map +1 -0
  120. package/dist/test-helpers/schema-validators.d.ts +32 -0
  121. package/dist/test-helpers/schema-validators.d.ts.map +1 -0
  122. package/dist/test-helpers/schema-validators.js +125 -0
  123. package/dist/test-helpers/schema-validators.js.map +1 -0
  124. package/dist/test-helpers/test-data-builders.d.ts +223 -0
  125. package/dist/test-helpers/test-data-builders.d.ts.map +1 -0
  126. package/dist/test-helpers/test-data-builders.js +288 -0
  127. package/dist/test-helpers/test-data-builders.js.map +1 -0
  128. package/dist/test-helpers/test-data-builders.test.d.ts +2 -0
  129. package/dist/test-helpers/test-data-builders.test.d.ts.map +1 -0
  130. package/dist/test-helpers/test-data-builders.test.js +306 -0
  131. package/dist/test-helpers/test-data-builders.test.js.map +1 -0
  132. package/dist/test-helpers/tool-validators.d.ts +28 -0
  133. package/dist/test-helpers/tool-validators.d.ts.map +1 -0
  134. package/dist/test-helpers/tool-validators.js +56 -0
  135. package/dist/test-helpers/tool-validators.js.map +1 -0
  136. package/dist/tools/context-stats.d.ts +1 -0
  137. package/dist/tools/context-stats.d.ts.map +1 -1
  138. package/dist/tools/context-stats.js +9 -5
  139. package/dist/tools/context-stats.js.map +1 -1
  140. package/dist/tools/context-stats.test.js +24 -10
  141. package/dist/tools/context-stats.test.js.map +1 -1
  142. package/dist/tools/get-trace-url.js +2 -2
  143. package/dist/tools/get-trace-url.js.map +1 -1
  144. package/dist/tools/health-check.js +2 -2
  145. package/dist/tools/health-check.js.map +1 -1
  146. package/dist/tools/index.d.ts +1 -0
  147. package/dist/tools/index.d.ts.map +1 -1
  148. package/dist/tools/index.js +1 -0
  149. package/dist/tools/index.js.map +1 -1
  150. package/dist/tools/query-evaluations.d.ts +186 -0
  151. package/dist/tools/query-evaluations.d.ts.map +1 -0
  152. package/dist/tools/query-evaluations.js +351 -0
  153. package/dist/tools/query-evaluations.js.map +1 -0
  154. package/dist/tools/query-evaluations.test.d.ts +5 -0
  155. package/dist/tools/query-evaluations.test.d.ts.map +1 -0
  156. package/dist/tools/query-evaluations.test.js +733 -0
  157. package/dist/tools/query-evaluations.test.js.map +1 -0
  158. package/dist/tools/query-llm-events.d.ts +24 -18
  159. package/dist/tools/query-llm-events.d.ts.map +1 -1
  160. package/dist/tools/query-llm-events.js +103 -60
  161. package/dist/tools/query-llm-events.js.map +1 -1
  162. package/dist/tools/query-llm-events.test.js +271 -9
  163. package/dist/tools/query-llm-events.test.js.map +1 -1
  164. package/dist/tools/query-logs.d.ts +28 -20
  165. package/dist/tools/query-logs.d.ts.map +1 -1
  166. package/dist/tools/query-logs.js +85 -61
  167. package/dist/tools/query-logs.js.map +1 -1
  168. package/dist/tools/query-logs.test.js +74 -145
  169. package/dist/tools/query-logs.test.js.map +1 -1
  170. package/dist/tools/query-metrics.d.ts +20 -20
  171. package/dist/tools/query-metrics.d.ts.map +1 -1
  172. package/dist/tools/query-metrics.js +109 -61
  173. package/dist/tools/query-metrics.js.map +1 -1
  174. package/dist/tools/query-metrics.test.js +26 -61
  175. package/dist/tools/query-metrics.test.js.map +1 -1
  176. package/dist/tools/query-traces.d.ts +24 -22
  177. package/dist/tools/query-traces.d.ts.map +1 -1
  178. package/dist/tools/query-traces.js +95 -70
  179. package/dist/tools/query-traces.js.map +1 -1
  180. package/dist/tools/query-traces.test.js +294 -90
  181. package/dist/tools/query-traces.test.js.map +1 -1
  182. package/dist/tools/setup-claudeignore.js +7 -7
  183. package/dist/tools/setup-claudeignore.js.map +1 -1
  184. package/dist/tools/setup-claudeignore.test.js +4 -25
  185. package/dist/tools/setup-claudeignore.test.js.map +1 -1
  186. package/package.json +3 -4
@@ -1,40 +1,22 @@
1
- import { describe, it, beforeEach, afterEach } from 'node:test';
1
+ import { describe, it, before, after, beforeEach } from 'node:test';
2
2
  import * as assert from 'node:assert';
3
3
  import * as fs from 'fs';
4
4
  import * as path from 'path';
5
- import * as os from 'os';
6
5
  import { LocalJsonlBackend, MultiDirectoryBackend } from './local-jsonl.js';
7
- import { buildAndWriteIndex } from '../lib/indexer.js';
8
- /**
9
- * Test utilities for creating temp test fixtures
10
- */
11
- function createTempDir() {
12
- return fs.mkdtempSync(path.join(os.tmpdir(), 'local-jsonl-test-'));
13
- }
14
- function removeTempDir(dir) {
15
- try {
16
- fs.rmSync(dir, { recursive: true, force: true });
17
- }
18
- catch {
19
- // Ignore cleanup errors
20
- }
21
- }
22
- function writeJsonlFile(filePath, data) {
23
- const content = data.map(item => JSON.stringify(item)).join('\n');
24
- fs.writeFileSync(filePath, content, 'utf-8');
25
- }
26
- function getTestDate() {
27
- return new Date().toISOString().split('T')[0];
28
- }
6
+ import { buildAndWriteIndex, getIndexPath } from '../lib/indexer.js';
7
+ import { createTempDir, removeTempDir, getSharedTempDir, clearTempDir, removeSharedTempDir, writeJsonlFile, getTestDate } from '../test-helpers/file-utils.js';
29
8
  describe('LocalJsonlBackend', () => {
30
9
  let tempDir;
31
10
  let backend;
11
+ before(() => {
12
+ tempDir = getSharedTempDir('LocalJsonlBackend');
13
+ });
32
14
  beforeEach(() => {
33
- tempDir = createTempDir();
15
+ clearTempDir(tempDir);
34
16
  backend = new LocalJsonlBackend(tempDir);
35
17
  });
36
- afterEach(() => {
37
- removeTempDir(tempDir);
18
+ after(() => {
19
+ removeSharedTempDir('LocalJsonlBackend');
38
20
  });
39
21
  describe('queryTraces', () => {
40
22
  it('should read and normalize trace spans from JSONL files', async () => {
@@ -2664,6 +2646,269 @@ describe('LocalJsonlBackend', () => {
2664
2646
  const results = await backend.queryLLMEvents({ provider: 'anthropic-new' });
2665
2647
  assert.strictEqual(results.length, 3);
2666
2648
  });
2649
+ it('should filter OpenAI events by provider', async () => {
2650
+ const today = getTestDate();
2651
+ const mockEvents = [
2652
+ {
2653
+ timestamp: '2026-01-28T10:00:00Z',
2654
+ name: 'llm.completion',
2655
+ attributes: {
2656
+ 'gen_ai.provider.name': 'openai',
2657
+ 'gen_ai.request.model': 'gpt-4o',
2658
+ 'gen_ai.usage.input_tokens': 500,
2659
+ },
2660
+ },
2661
+ {
2662
+ timestamp: '2026-01-28T10:01:00Z',
2663
+ name: 'llm.completion',
2664
+ attributes: {
2665
+ 'gen_ai.provider.name': 'anthropic',
2666
+ 'gen_ai.request.model': 'claude-3-opus',
2667
+ },
2668
+ },
2669
+ ];
2670
+ writeJsonlFile(path.join(tempDir, `llm-events-${today}.jsonl`), mockEvents);
2671
+ const results = await backend.queryLLMEvents({ provider: 'openai' });
2672
+ assert.strictEqual(results.length, 1);
2673
+ assert.strictEqual(results[0].attributes?.['gen_ai.request.model'], 'gpt-4o');
2674
+ });
2675
+ it('should filter Google Gemini events by provider', async () => {
2676
+ const today = getTestDate();
2677
+ const mockEvents = [
2678
+ {
2679
+ timestamp: '2026-01-28T10:00:00Z',
2680
+ name: 'llm.completion',
2681
+ attributes: {
2682
+ 'gen_ai.provider.name': 'gcp.gemini',
2683
+ 'gen_ai.request.model': 'gemini-1.5-pro',
2684
+ },
2685
+ },
2686
+ {
2687
+ timestamp: '2026-01-28T10:01:00Z',
2688
+ name: 'llm.completion',
2689
+ attributes: {
2690
+ 'gen_ai.provider.name': 'gcp.vertex_ai',
2691
+ 'gen_ai.request.model': 'gemini-pro',
2692
+ },
2693
+ },
2694
+ {
2695
+ timestamp: '2026-01-28T10:02:00Z',
2696
+ name: 'llm.completion',
2697
+ attributes: {
2698
+ 'gen_ai.provider.name': 'openai',
2699
+ 'gen_ai.request.model': 'gpt-4',
2700
+ },
2701
+ },
2702
+ ];
2703
+ writeJsonlFile(path.join(tempDir, `llm-events-${today}.jsonl`), mockEvents);
2704
+ const geminiResults = await backend.queryLLMEvents({ provider: 'gcp.gemini' });
2705
+ assert.strictEqual(geminiResults.length, 1);
2706
+ assert.strictEqual(geminiResults[0].attributes?.['gen_ai.request.model'], 'gemini-1.5-pro');
2707
+ const vertexResults = await backend.queryLLMEvents({ provider: 'gcp.vertex_ai' });
2708
+ assert.strictEqual(vertexResults.length, 1);
2709
+ });
2710
+ it('should filter Mistral AI events by provider', async () => {
2711
+ const today = getTestDate();
2712
+ const mockEvents = [
2713
+ {
2714
+ timestamp: '2026-01-28T10:00:00Z',
2715
+ name: 'llm.completion',
2716
+ attributes: {
2717
+ 'gen_ai.provider.name': 'mistral_ai',
2718
+ 'gen_ai.request.model': 'mistral-large',
2719
+ },
2720
+ },
2721
+ {
2722
+ timestamp: '2026-01-28T10:01:00Z',
2723
+ name: 'llm.completion',
2724
+ attributes: {
2725
+ 'gen_ai.provider.name': 'anthropic',
2726
+ 'gen_ai.request.model': 'claude-3',
2727
+ },
2728
+ },
2729
+ ];
2730
+ writeJsonlFile(path.join(tempDir, `llm-events-${today}.jsonl`), mockEvents);
2731
+ const results = await backend.queryLLMEvents({ provider: 'mistral_ai' });
2732
+ assert.strictEqual(results.length, 1);
2733
+ assert.strictEqual(results[0].attributes?.['gen_ai.request.model'], 'mistral-large');
2734
+ });
2735
+ it('should filter AWS Bedrock events by provider', async () => {
2736
+ const today = getTestDate();
2737
+ const mockEvents = [
2738
+ {
2739
+ timestamp: '2026-01-28T10:00:00Z',
2740
+ name: 'llm.completion',
2741
+ attributes: {
2742
+ 'gen_ai.provider.name': 'aws.bedrock',
2743
+ 'gen_ai.request.model': 'anthropic.claude-3-sonnet-20240229-v1:0',
2744
+ },
2745
+ },
2746
+ {
2747
+ timestamp: '2026-01-28T10:01:00Z',
2748
+ name: 'llm.completion',
2749
+ attributes: {
2750
+ 'gen_ai.provider.name': 'anthropic',
2751
+ 'gen_ai.request.model': 'claude-3-sonnet',
2752
+ },
2753
+ },
2754
+ ];
2755
+ writeJsonlFile(path.join(tempDir, `llm-events-${today}.jsonl`), mockEvents);
2756
+ const results = await backend.queryLLMEvents({ provider: 'aws.bedrock' });
2757
+ assert.strictEqual(results.length, 1);
2758
+ assert.ok(results[0].attributes?.['gen_ai.request.model']?.toString().includes('anthropic.claude'));
2759
+ });
2760
+ it('should filter Cohere events by provider', async () => {
2761
+ const today = getTestDate();
2762
+ const mockEvents = [
2763
+ {
2764
+ timestamp: '2026-01-28T10:00:00Z',
2765
+ name: 'llm.completion',
2766
+ attributes: {
2767
+ 'gen_ai.provider.name': 'cohere',
2768
+ 'gen_ai.request.model': 'command-r-plus',
2769
+ },
2770
+ },
2771
+ {
2772
+ timestamp: '2026-01-28T10:01:00Z',
2773
+ name: 'llm.completion',
2774
+ attributes: {
2775
+ 'gen_ai.provider.name': 'openai',
2776
+ 'gen_ai.request.model': 'gpt-4',
2777
+ },
2778
+ },
2779
+ ];
2780
+ writeJsonlFile(path.join(tempDir, `llm-events-${today}.jsonl`), mockEvents);
2781
+ const results = await backend.queryLLMEvents({ provider: 'cohere' });
2782
+ assert.strictEqual(results.length, 1);
2783
+ assert.strictEqual(results[0].attributes?.['gen_ai.request.model'], 'command-r-plus');
2784
+ });
2785
+ it('should filter Groq events by provider', async () => {
2786
+ const today = getTestDate();
2787
+ const mockEvents = [
2788
+ {
2789
+ timestamp: '2026-01-28T10:00:00Z',
2790
+ name: 'llm.completion',
2791
+ attributes: {
2792
+ 'gen_ai.provider.name': 'groq',
2793
+ 'gen_ai.request.model': 'llama-3.3-70b',
2794
+ },
2795
+ },
2796
+ {
2797
+ timestamp: '2026-01-28T10:01:00Z',
2798
+ name: 'llm.completion',
2799
+ attributes: {
2800
+ 'gen_ai.provider.name': 'together_ai',
2801
+ 'gen_ai.request.model': 'llama-3-70b',
2802
+ },
2803
+ },
2804
+ ];
2805
+ writeJsonlFile(path.join(tempDir, `llm-events-${today}.jsonl`), mockEvents);
2806
+ const groqResults = await backend.queryLLMEvents({ provider: 'groq' });
2807
+ assert.strictEqual(groqResults.length, 1);
2808
+ assert.strictEqual(groqResults[0].attributes?.['gen_ai.request.model'], 'llama-3.3-70b');
2809
+ const togetherResults = await backend.queryLLMEvents({ provider: 'together_ai' });
2810
+ assert.strictEqual(togetherResults.length, 1);
2811
+ });
2812
+ it('should filter Ollama local model events by provider', async () => {
2813
+ const today = getTestDate();
2814
+ const mockEvents = [
2815
+ {
2816
+ timestamp: '2026-01-28T10:00:00Z',
2817
+ name: 'llm.completion',
2818
+ attributes: {
2819
+ 'gen_ai.provider.name': 'ollama',
2820
+ 'gen_ai.request.model': 'llama3:8b',
2821
+ },
2822
+ },
2823
+ {
2824
+ timestamp: '2026-01-28T10:01:00Z',
2825
+ name: 'llm.completion',
2826
+ attributes: {
2827
+ 'gen_ai.provider.name': 'openai',
2828
+ 'gen_ai.request.model': 'gpt-4',
2829
+ },
2830
+ },
2831
+ ];
2832
+ writeJsonlFile(path.join(tempDir, `llm-events-${today}.jsonl`), mockEvents);
2833
+ const results = await backend.queryLLMEvents({ provider: 'ollama' });
2834
+ assert.strictEqual(results.length, 1);
2835
+ assert.strictEqual(results[0].attributes?.['gen_ai.request.model'], 'llama3:8b');
2836
+ });
2837
+ it('should filter custom/internal provider events', async () => {
2838
+ const today = getTestDate();
2839
+ const mockEvents = [
2840
+ {
2841
+ timestamp: '2026-01-28T10:00:00Z',
2842
+ name: 'llm.completion',
2843
+ attributes: {
2844
+ 'gen_ai.provider.name': 'custom-internal-llm',
2845
+ 'gen_ai.request.model': 'internal-model-v2',
2846
+ },
2847
+ },
2848
+ {
2849
+ timestamp: '2026-01-28T10:01:00Z',
2850
+ name: 'llm.completion',
2851
+ attributes: {
2852
+ 'gen_ai.provider.name': 'anthropic',
2853
+ 'gen_ai.request.model': 'claude-3',
2854
+ },
2855
+ },
2856
+ ];
2857
+ writeJsonlFile(path.join(tempDir, `llm-events-${today}.jsonl`), mockEvents);
2858
+ const results = await backend.queryLLMEvents({ provider: 'custom-internal-llm' });
2859
+ assert.strictEqual(results.length, 1);
2860
+ assert.strictEqual(results[0].attributes?.['gen_ai.request.model'], 'internal-model-v2');
2861
+ });
2862
+ it('should combine provider and model filters', async () => {
2863
+ const today = getTestDate();
2864
+ const mockEvents = [
2865
+ {
2866
+ timestamp: '2026-01-28T10:00:00Z',
2867
+ name: 'llm.completion',
2868
+ attributes: {
2869
+ 'gen_ai.provider.name': 'openai',
2870
+ 'gen_ai.request.model': 'gpt-4o',
2871
+ },
2872
+ },
2873
+ {
2874
+ timestamp: '2026-01-28T10:01:00Z',
2875
+ name: 'llm.completion',
2876
+ attributes: {
2877
+ 'gen_ai.provider.name': 'openai',
2878
+ 'gen_ai.request.model': 'gpt-4-turbo',
2879
+ },
2880
+ },
2881
+ {
2882
+ timestamp: '2026-01-28T10:02:00Z',
2883
+ name: 'llm.completion',
2884
+ attributes: {
2885
+ 'gen_ai.provider.name': 'anthropic',
2886
+ 'gen_ai.request.model': 'gpt-4o', // Same model name, different provider
2887
+ },
2888
+ },
2889
+ ];
2890
+ writeJsonlFile(path.join(tempDir, `llm-events-${today}.jsonl`), mockEvents);
2891
+ const results = await backend.queryLLMEvents({ provider: 'openai', model: 'gpt-4o' });
2892
+ assert.strictEqual(results.length, 1);
2893
+ assert.strictEqual(results[0].attributes?.['gen_ai.provider.name'], 'openai');
2894
+ assert.strictEqual(results[0].attributes?.['gen_ai.request.model'], 'gpt-4o');
2895
+ });
2896
+ it('should return empty array when provider has no events', async () => {
2897
+ const today = getTestDate();
2898
+ const mockEvents = [
2899
+ {
2900
+ timestamp: '2026-01-28T10:00:00Z',
2901
+ name: 'llm.completion',
2902
+ attributes: {
2903
+ 'gen_ai.provider.name': 'anthropic',
2904
+ 'gen_ai.request.model': 'claude-3',
2905
+ },
2906
+ },
2907
+ ];
2908
+ writeJsonlFile(path.join(tempDir, `llm-events-${today}.jsonl`), mockEvents);
2909
+ const results = await backend.queryLLMEvents({ provider: 'nonexistent-provider' });
2910
+ assert.strictEqual(results.length, 0);
2911
+ });
2667
2912
  it('should filter events by operationName', async () => {
2668
2913
  const today = getTestDate();
2669
2914
  const mockEvents = [
@@ -3175,12 +3420,15 @@ describe('streaming JSONL optimization', () => {
3175
3420
  describe('QueryCache', () => {
3176
3421
  let tempDir;
3177
3422
  let backend;
3423
+ before(() => {
3424
+ tempDir = getSharedTempDir('QueryCache');
3425
+ });
3178
3426
  beforeEach(() => {
3179
- tempDir = createTempDir();
3427
+ clearTempDir(tempDir);
3180
3428
  backend = new LocalJsonlBackend(tempDir);
3181
3429
  });
3182
- afterEach(() => {
3183
- removeTempDir(tempDir);
3430
+ after(() => {
3431
+ removeSharedTempDir('QueryCache');
3184
3432
  });
3185
3433
  describe('caching behavior', () => {
3186
3434
  it('should return cached results on second query with same options', async () => {
@@ -3459,12 +3707,15 @@ describe('QueryCache', () => {
3459
3707
  });
3460
3708
  describe('MultiDirectoryBackend', () => {
3461
3709
  let projectDir;
3710
+ before(() => {
3711
+ projectDir = getSharedTempDir('MultiDirectoryBackend');
3712
+ });
3462
3713
  beforeEach(() => {
3463
- // Create a project directory that will have local telemetry subdirectories
3464
- projectDir = createTempDir();
3714
+ // Clear project directory for each test
3715
+ clearTempDir(projectDir);
3465
3716
  });
3466
- afterEach(() => {
3467
- removeTempDir(projectDir);
3717
+ after(() => {
3718
+ removeSharedTempDir('MultiDirectoryBackend');
3468
3719
  });
3469
3720
  describe('constructor and getDirectories', () => {
3470
3721
  it('should return directories when local telemetry dirs exist', () => {
@@ -3474,8 +3725,9 @@ describe('MultiDirectoryBackend', () => {
3474
3725
  const backend = new MultiDirectoryBackend(projectDir);
3475
3726
  const dirs = backend.getDirectories();
3476
3727
  assert.ok(Array.isArray(dirs));
3477
- // Should include the local telemetry directory
3478
- const localDir = dirs.find(d => d.source === 'local' && d.path === localTelemetry);
3728
+ // Should include the local telemetry directory (resolved path due to symlink protection)
3729
+ const resolvedPath = fs.realpathSync(localTelemetry);
3730
+ const localDir = dirs.find(d => d.source === 'local' && d.path === resolvedPath);
3479
3731
  assert.ok(localDir);
3480
3732
  });
3481
3733
  it('should have name property set to multi-directory', () => {
@@ -3487,7 +3739,9 @@ describe('MultiDirectoryBackend', () => {
3487
3739
  fs.mkdirSync(localTelemetry, { recursive: true });
3488
3740
  const backend = new MultiDirectoryBackend(projectDir);
3489
3741
  const dirs = backend.getDirectories();
3490
- const localDir = dirs.find(d => d.path === localTelemetry);
3742
+ // Compare against resolved path (due to symlink protection)
3743
+ const resolvedPath = fs.realpathSync(localTelemetry);
3744
+ const localDir = dirs.find(d => d.path === resolvedPath);
3491
3745
  assert.ok(localDir);
3492
3746
  assert.strictEqual(localDir?.source, 'local');
3493
3747
  });
@@ -3496,7 +3750,9 @@ describe('MultiDirectoryBackend', () => {
3496
3750
  fs.mkdirSync(localTelemetry, { recursive: true });
3497
3751
  const backend = new MultiDirectoryBackend(projectDir);
3498
3752
  const dirs = backend.getDirectories();
3499
- const localDir = dirs.find(d => d.path === localTelemetry);
3753
+ // Compare against resolved path (due to symlink protection)
3754
+ const resolvedPath = fs.realpathSync(localTelemetry);
3755
+ const localDir = dirs.find(d => d.path === resolvedPath);
3500
3756
  assert.ok(localDir);
3501
3757
  assert.strictEqual(localDir?.source, 'local');
3502
3758
  });
@@ -3735,17 +3991,15 @@ describe('MultiDirectoryBackend', () => {
3735
3991
  describe('OTLP Export', () => {
3736
3992
  let tempDir;
3737
3993
  let backend;
3994
+ before(() => {
3995
+ tempDir = getSharedTempDir('OTLPExport');
3996
+ });
3738
3997
  beforeEach(() => {
3739
- tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'otlp-export-test-'));
3998
+ clearTempDir(tempDir);
3740
3999
  backend = new LocalJsonlBackend(tempDir);
3741
4000
  });
3742
- afterEach(() => {
3743
- try {
3744
- fs.rmSync(tempDir, { recursive: true, force: true });
3745
- }
3746
- catch {
3747
- // Ignore cleanup errors
3748
- }
4001
+ after(() => {
4002
+ removeSharedTempDir('OTLPExport');
3749
4003
  });
3750
4004
  describe('exportTracesOTLP', () => {
3751
4005
  it('should export traces in OTLP JSON format', async () => {
@@ -3970,5 +4224,428 @@ describe('OTLP Export', () => {
3970
4224
  assert.strictEqual(dataPoint.exemplars[0].asDouble, 150);
3971
4225
  });
3972
4226
  });
4227
+ describe('queryEvaluations', () => {
4228
+ it('should read and normalize evaluations from JSONL files', async () => {
4229
+ const today = getTestDate();
4230
+ const mockEvaluations = [
4231
+ {
4232
+ timestamp: '2026-01-29T10:00:00Z',
4233
+ attributes: {
4234
+ 'gen_ai.evaluation.name': 'Relevance',
4235
+ 'gen_ai.evaluation.score.value': 0.92,
4236
+ 'gen_ai.evaluation.score.label': 'relevant',
4237
+ 'gen_ai.evaluation.explanation': 'Response addresses the query',
4238
+ },
4239
+ },
4240
+ ];
4241
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4242
+ const results = await backend.queryEvaluations({});
4243
+ assert.strictEqual(results.length, 1);
4244
+ assert.strictEqual(results[0].evaluationName, 'Relevance');
4245
+ assert.strictEqual(results[0].scoreValue, 0.92);
4246
+ assert.strictEqual(results[0].scoreLabel, 'relevant');
4247
+ assert.strictEqual(results[0].explanation, 'Response addresses the query');
4248
+ });
4249
+ it('should filter evaluations by evaluationName substring', async () => {
4250
+ const today = getTestDate();
4251
+ const mockEvaluations = [
4252
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Relevance' } },
4253
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Faithfulness' } },
4254
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'RelevanceScore' } },
4255
+ ];
4256
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4257
+ const results = await backend.queryEvaluations({ evaluationName: 'Relevance' });
4258
+ assert.strictEqual(results.length, 2);
4259
+ assert.ok(results.every(e => e.evaluationName.includes('Relevance')));
4260
+ });
4261
+ it('should filter evaluations by scoreMin threshold', async () => {
4262
+ const today = getTestDate();
4263
+ const mockEvaluations = [
4264
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.3 } },
4265
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.7 } },
4266
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.9 } },
4267
+ ];
4268
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4269
+ const results = await backend.queryEvaluations({ scoreMin: 0.5 });
4270
+ assert.strictEqual(results.length, 2);
4271
+ assert.ok(results.every(e => e.scoreValue >= 0.5));
4272
+ });
4273
+ it('should filter evaluations by scoreMax threshold', async () => {
4274
+ const today = getTestDate();
4275
+ const mockEvaluations = [
4276
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.3 } },
4277
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.7 } },
4278
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.9 } },
4279
+ ];
4280
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4281
+ const results = await backend.queryEvaluations({ scoreMax: 0.5 });
4282
+ assert.strictEqual(results.length, 1);
4283
+ assert.strictEqual(results[0].scoreValue, 0.3);
4284
+ });
4285
+ it('should filter evaluations by score range', async () => {
4286
+ const today = getTestDate();
4287
+ const mockEvaluations = [
4288
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.3 } },
4289
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.5 } },
4290
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.7 } },
4291
+ { timestamp: '2026-01-29T10:03:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.9 } },
4292
+ ];
4293
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4294
+ const results = await backend.queryEvaluations({ scoreMin: 0.4, scoreMax: 0.8 });
4295
+ assert.strictEqual(results.length, 2);
4296
+ assert.ok(results.some(e => e.scoreValue === 0.5));
4297
+ assert.ok(results.some(e => e.scoreValue === 0.7));
4298
+ });
4299
+ it('should pass evaluations without scoreValue through score range filters (P1-1)', async () => {
4300
+ const today = getTestDate();
4301
+ const mockEvaluations = [
4302
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'ToolCorrectness', 'gen_ai.evaluation.score.label': 'pass' } },
4303
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Relevance', 'gen_ai.evaluation.score.value': 0.8 } },
4304
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Relevance', 'gen_ai.evaluation.score.value': 0.3 } },
4305
+ ];
4306
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4307
+ const results = await backend.queryEvaluations({ scoreMin: 0.5 });
4308
+ // Should include ToolCorrectness (no scoreValue) AND Relevance with 0.8
4309
+ assert.strictEqual(results.length, 2);
4310
+ assert.ok(results.some(e => e.evaluationName === 'ToolCorrectness'));
4311
+ assert.ok(results.some(e => e.scoreValue === 0.8));
4312
+ });
4313
+ it('should filter evaluations by scoreLabel exact match', async () => {
4314
+ const today = getTestDate();
4315
+ const mockEvaluations = [
4316
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.label': 'pass' } },
4317
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.label': 'fail' } },
4318
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.label': 'pass' } },
4319
+ ];
4320
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4321
+ const results = await backend.queryEvaluations({ scoreLabel: 'pass' });
4322
+ assert.strictEqual(results.length, 2);
4323
+ assert.ok(results.every(e => e.scoreLabel === 'pass'));
4324
+ });
4325
+ it('should filter evaluations by responseId', async () => {
4326
+ const today = getTestDate();
4327
+ const mockEvaluations = [
4328
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.response.id': 'resp-123' } },
4329
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.response.id': 'resp-456' } },
4330
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.response.id': 'resp-123' } },
4331
+ ];
4332
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4333
+ const results = await backend.queryEvaluations({ responseId: 'resp-123' });
4334
+ assert.strictEqual(results.length, 2);
4335
+ assert.ok(results.every(e => e.responseId === 'resp-123'));
4336
+ });
4337
+ it('should filter evaluations by traceId', async () => {
4338
+ const today = getTestDate();
4339
+ const mockEvaluations = [
4340
+ { timestamp: '2026-01-29T10:00:00Z', traceId: 'trace-abc', attributes: { 'gen_ai.evaluation.name': 'Test' } },
4341
+ { timestamp: '2026-01-29T10:01:00Z', traceId: 'trace-xyz', attributes: { 'gen_ai.evaluation.name': 'Test' } },
4342
+ { timestamp: '2026-01-29T10:02:00Z', traceId: 'trace-abc', attributes: { 'gen_ai.evaluation.name': 'Test' } },
4343
+ ];
4344
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4345
+ const results = await backend.queryEvaluations({ traceId: 'trace-abc' });
4346
+ assert.strictEqual(results.length, 2);
4347
+ assert.ok(results.every(e => e.traceId === 'trace-abc'));
4348
+ });
4349
+ it('should filter evaluations by sessionId', async () => {
4350
+ const today = getTestDate();
4351
+ const mockEvaluations = [
4352
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'session.id': 'sess-111' } },
4353
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'session.id': 'sess-222' } },
4354
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'session.id': 'sess-111' } },
4355
+ ];
4356
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4357
+ const results = await backend.queryEvaluations({ sessionId: 'sess-111' });
4358
+ assert.strictEqual(results.length, 2);
4359
+ assert.ok(results.every(e => e.sessionId === 'sess-111'));
4360
+ });
4361
+ it('should skip evaluations without required evaluationName', async () => {
4362
+ const today = getTestDate();
4363
+ const mockEvaluations = [
4364
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Valid' } },
4365
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.score.value': 0.5 } }, // Missing name
4366
+ { timestamp: '2026-01-29T10:02:00Z', attributes: {} }, // Empty
4367
+ ];
4368
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4369
+ const results = await backend.queryEvaluations({});
4370
+ assert.strictEqual(results.length, 1);
4371
+ assert.strictEqual(results[0].evaluationName, 'Valid');
4372
+ });
4373
+ it('should reject NaN and Infinity in scoreValue (P0-2)', async () => {
4374
+ const today = getTestDate();
4375
+ // Write raw JSONL with invalid numbers (simulating corrupted data)
4376
+ const rawContent = [
4377
+ JSON.stringify({ timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.5 } }),
4378
+ '{"timestamp":"2026-01-29T10:01:00Z","attributes":{"gen_ai.evaluation.name":"NaNTest","gen_ai.evaluation.score.value":"NaN"}}',
4379
+ JSON.stringify({ timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Valid', 'gen_ai.evaluation.score.value': 0.8 } }),
4380
+ ].join('\n');
4381
+ fs.writeFileSync(path.join(tempDir, `evaluations-${today}.jsonl`), rawContent, 'utf-8');
4382
+ const results = await backend.queryEvaluations({});
4383
+ // Should have 3 evaluations (NaNTest has scoreValue as string "NaN" so it becomes undefined, but evaluation itself is valid)
4384
+ assert.strictEqual(results.length, 3);
4385
+ // The NaNTest should have undefined scoreValue (string "NaN" is not a number type)
4386
+ const nanTest = results.find(e => e.evaluationName === 'NaNTest');
4387
+ assert.strictEqual(nanTest?.scoreValue, undefined);
4388
+ });
4389
+ it('should reject empty strings in scoreLabel (P0-2)', async () => {
4390
+ const today = getTestDate();
4391
+ const mockEvaluations = [
4392
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.label': 'pass' } },
4393
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.label': '' } },
4394
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.label': ' ' } },
4395
+ ];
4396
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4397
+ const results = await backend.queryEvaluations({});
4398
+ assert.strictEqual(results.length, 3);
4399
+ // Only first should have scoreLabel
4400
+ assert.strictEqual(results[0].scoreLabel, 'pass');
4401
+ assert.strictEqual(results[1].scoreLabel, undefined);
4402
+ assert.strictEqual(results[2].scoreLabel, undefined);
4403
+ });
4404
+ it('should cache query results', async () => {
4405
+ const today = getTestDate();
4406
+ const mockEvaluations = [
4407
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test' } },
4408
+ ];
4409
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4410
+ // First query
4411
+ const results1 = await backend.queryEvaluations({ evaluationName: 'Test' });
4412
+ const stats1 = backend.getCacheStats();
4413
+ // Second query (same params)
4414
+ const results2 = await backend.queryEvaluations({ evaluationName: 'Test' });
4415
+ const stats2 = backend.getCacheStats();
4416
+ assert.deepStrictEqual(results1, results2);
4417
+ assert.strictEqual(stats2.evaluations.hits, stats1.evaluations.hits + 1);
4418
+ });
4419
+ it('should apply limit and offset', async () => {
4420
+ const today = getTestDate();
4421
+ const mockEvaluations = Array.from({ length: 10 }, (_, i) => ({
4422
+ timestamp: `2026-01-29T10:${String(i).padStart(2, '0')}:00Z`,
4423
+ attributes: { 'gen_ai.evaluation.name': `Eval${i}` },
4424
+ }));
4425
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4426
+ const results = await backend.queryEvaluations({ limit: 3, offset: 2 });
4427
+ assert.strictEqual(results.length, 3);
4428
+ assert.strictEqual(results[0].evaluationName, 'Eval2');
4429
+ assert.strictEqual(results[1].evaluationName, 'Eval3');
4430
+ assert.strictEqual(results[2].evaluationName, 'Eval4');
4431
+ });
4432
+ it('should return empty array when no files found', async () => {
4433
+ const results = await backend.queryEvaluations({});
4434
+ assert.deepStrictEqual(results, []);
4435
+ });
4436
+ it('should use index when available', async () => {
4437
+ const today = getTestDate();
4438
+ const mockEvaluations = [
4439
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Relevance', 'gen_ai.evaluation.score.label': 'pass' } },
4440
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Faithfulness', 'gen_ai.evaluation.score.label': 'fail' } },
4441
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Relevance', 'gen_ai.evaluation.score.label': 'pass' } },
4442
+ ];
4443
+ const filePath = path.join(tempDir, `evaluations-${today}.jsonl`);
4444
+ writeJsonlFile(filePath, mockEvaluations);
4445
+ // Build index
4446
+ await buildAndWriteIndex(filePath, 'evaluations');
4447
+ // Verify index exists
4448
+ const idxPath = getIndexPath(filePath);
4449
+ assert.ok(fs.existsSync(idxPath));
4450
+ // Query should use index
4451
+ const results = await backend.queryEvaluations({ evaluationName: 'Relevance' });
4452
+ assert.strictEqual(results.length, 2);
4453
+ assert.ok(results.every(e => e.evaluationName === 'Relevance'));
4454
+ });
4455
+ it('should filter by date range', async () => {
4456
+ const today = getTestDate();
4457
+ const yesterday = new Date(Date.now() - 24 * 60 * 60 * 1000).toISOString().split('T')[0];
4458
+ // Write files for both days
4459
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), [
4460
+ { timestamp: `${today}T10:00:00Z`, attributes: { 'gen_ai.evaluation.name': 'Today' } },
4461
+ ]);
4462
+ writeJsonlFile(path.join(tempDir, `evaluations-${yesterday}.jsonl`), [
4463
+ { timestamp: `${yesterday}T10:00:00Z`, attributes: { 'gen_ai.evaluation.name': 'Yesterday' } },
4464
+ ]);
4465
+ // Query only today
4466
+ const results = await backend.queryEvaluations({ startDate: today, endDate: today });
4467
+ assert.strictEqual(results.length, 1);
4468
+ assert.strictEqual(results[0].evaluationName, 'Today');
4469
+ });
4470
+ // Phase 3: evaluator field tests
4471
+ it('should read and normalize evaluator and evaluatorType fields', async () => {
4472
+ const today = getTestDate();
4473
+ const mockEvaluations = [
4474
+ {
4475
+ timestamp: '2026-01-29T10:00:00Z',
4476
+ attributes: {
4477
+ 'gen_ai.evaluation.name': 'Relevance',
4478
+ 'gen_ai.evaluation.score.value': 0.92,
4479
+ 'gen_ai.evaluation.evaluator': 'gpt-4-as-judge',
4480
+ 'gen_ai.evaluation.evaluator.type': 'llm',
4481
+ },
4482
+ },
4483
+ ];
4484
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4485
+ const results = await backend.queryEvaluations({});
4486
+ assert.strictEqual(results.length, 1);
4487
+ assert.strictEqual(results[0].evaluator, 'gpt-4-as-judge');
4488
+ assert.strictEqual(results[0].evaluatorType, 'llm');
4489
+ });
4490
+ it('should filter evaluations by evaluator exact match', async () => {
4491
+ const today = getTestDate();
4492
+ const mockEvaluations = [
4493
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator': 'gpt-4-as-judge' } },
4494
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator': 'human-reviewer' } },
4495
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator': 'gpt-4-as-judge' } },
4496
+ ];
4497
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4498
+ const results = await backend.queryEvaluations({ evaluator: 'gpt-4-as-judge' });
4499
+ assert.strictEqual(results.length, 2);
4500
+ assert.ok(results.every(e => e.evaluator === 'gpt-4-as-judge'));
4501
+ });
4502
+ it('should filter evaluations by evaluatorType exact match', async () => {
4503
+ const today = getTestDate();
4504
+ const mockEvaluations = [
4505
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator.type': 'llm' } },
4506
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator.type': 'human' } },
4507
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator.type': 'llm' } },
4508
+ ];
4509
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4510
+ const results = await backend.queryEvaluations({ evaluatorType: 'llm' });
4511
+ assert.strictEqual(results.length, 2);
4512
+ assert.ok(results.every(e => e.evaluatorType === 'llm'));
4513
+ });
4514
+ it('should handle all valid evaluatorType values', async () => {
4515
+ const today = getTestDate();
4516
+ const mockEvaluations = [
4517
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test1', 'gen_ai.evaluation.evaluator.type': 'llm' } },
4518
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test2', 'gen_ai.evaluation.evaluator.type': 'human' } },
4519
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test3', 'gen_ai.evaluation.evaluator.type': 'rule' } },
4520
+ { timestamp: '2026-01-29T10:03:00Z', attributes: { 'gen_ai.evaluation.name': 'Test4', 'gen_ai.evaluation.evaluator.type': 'classifier' } },
4521
+ ];
4522
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4523
+ const results = await backend.queryEvaluations({});
4524
+ assert.strictEqual(results.length, 4);
4525
+ // Results may not be in insertion order - check that all types are present
4526
+ const types = results.map(r => r.evaluatorType).sort();
4527
+ assert.deepStrictEqual(types, ['classifier', 'human', 'llm', 'rule']);
4528
+ });
4529
+ it('should reject invalid evaluatorType values', async () => {
4530
+ const today = getTestDate();
4531
+ const mockEvaluations = [
4532
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator.type': 'invalid' } },
4533
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator.type': 'LLM' } }, // Case matters
4534
+ ];
4535
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4536
+ const results = await backend.queryEvaluations({});
4537
+ assert.strictEqual(results.length, 2);
4538
+ // Invalid types should be undefined
4539
+ assert.strictEqual(results[0].evaluatorType, undefined);
4540
+ assert.strictEqual(results[1].evaluatorType, 'llm'); // Normalized to lowercase
4541
+ });
4542
+ it('should use index when filtering by evaluator', async () => {
4543
+ const today = getTestDate();
4544
+ const filePath = path.join(tempDir, `evaluations-${today}.jsonl`);
4545
+ const mockEvaluations = [
4546
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Relevance', 'gen_ai.evaluation.evaluator': 'gpt-4-as-judge' } },
4547
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Faithfulness', 'gen_ai.evaluation.evaluator': 'human-reviewer' } },
4548
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Relevance', 'gen_ai.evaluation.evaluator': 'gpt-4-as-judge' } },
4549
+ ];
4550
+ writeJsonlFile(filePath, mockEvaluations);
4551
+ // Build index
4552
+ await buildAndWriteIndex(filePath, 'evaluations');
4553
+ // Verify index exists
4554
+ const idxPath = getIndexPath(filePath);
4555
+ assert.ok(fs.existsSync(idxPath));
4556
+ // Query should use index
4557
+ const results = await backend.queryEvaluations({ evaluator: 'gpt-4-as-judge' });
4558
+ assert.strictEqual(results.length, 2);
4559
+ assert.ok(results.every(e => e.evaluator === 'gpt-4-as-judge'));
4560
+ });
4561
+ it('should handle empty evaluator string', async () => {
4562
+ const today = getTestDate();
4563
+ const mockEvaluations = [
4564
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator': '' } },
4565
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator': ' ' } },
4566
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator': 'valid' } },
4567
+ ];
4568
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4569
+ const results = await backend.queryEvaluations({});
4570
+ assert.strictEqual(results.length, 3);
4571
+ // Empty and whitespace-only should be undefined
4572
+ assert.strictEqual(results[0].evaluator, undefined);
4573
+ assert.strictEqual(results[1].evaluator, undefined);
4574
+ assert.strictEqual(results[2].evaluator, 'valid');
4575
+ });
4576
+ it('should filter by both evaluator and evaluatorType', async () => {
4577
+ const today = getTestDate();
4578
+ const mockEvaluations = [
4579
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator': 'gpt-4', 'gen_ai.evaluation.evaluator.type': 'llm' } },
4580
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator': 'gpt-4', 'gen_ai.evaluation.evaluator.type': 'classifier' } },
4581
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.evaluator': 'claude', 'gen_ai.evaluation.evaluator.type': 'llm' } },
4582
+ ];
4583
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4584
+ const results = await backend.queryEvaluations({ evaluator: 'gpt-4', evaluatorType: 'llm' });
4585
+ assert.strictEqual(results.length, 1);
4586
+ assert.strictEqual(results[0].evaluator, 'gpt-4');
4587
+ assert.strictEqual(results[0].evaluatorType, 'llm');
4588
+ });
4589
+ // Phase 1: scoreUnit field tests
4590
+ it('should read and normalize scoreUnit field', async () => {
4591
+ const today = getTestDate();
4592
+ const mockEvaluations = [
4593
+ {
4594
+ timestamp: '2026-01-29T10:00:00Z',
4595
+ attributes: {
4596
+ 'gen_ai.evaluation.name': 'Relevance',
4597
+ 'gen_ai.evaluation.score.value': 85,
4598
+ 'gen_ai.evaluation.score.unit': 'percentage',
4599
+ },
4600
+ },
4601
+ ];
4602
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4603
+ const results = await backend.queryEvaluations({});
4604
+ assert.strictEqual(results.length, 1);
4605
+ assert.strictEqual(results[0].scoreValue, 85);
4606
+ assert.strictEqual(results[0].scoreUnit, 'percentage');
4607
+ });
4608
+ it('should handle various scoreUnit values', async () => {
4609
+ const today = getTestDate();
4610
+ const mockEvaluations = [
4611
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test1', 'gen_ai.evaluation.score.value': 85, 'gen_ai.evaluation.score.unit': 'percentage' } },
4612
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test2', 'gen_ai.evaluation.score.value': 0.85, 'gen_ai.evaluation.score.unit': 'ratio_0_1' } },
4613
+ { timestamp: '2026-01-29T10:02:00Z', attributes: { 'gen_ai.evaluation.name': 'Test3', 'gen_ai.evaluation.score.value': 4, 'gen_ai.evaluation.score.unit': 'stars_1_5' } },
4614
+ ];
4615
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4616
+ const results = await backend.queryEvaluations({});
4617
+ assert.strictEqual(results.length, 3);
4618
+ // Results may not be in insertion order
4619
+ const percentage = results.find(r => r.evaluationName === 'Test1');
4620
+ const ratio = results.find(r => r.evaluationName === 'Test2');
4621
+ const stars = results.find(r => r.evaluationName === 'Test3');
4622
+ assert.strictEqual(percentage?.scoreUnit, 'percentage');
4623
+ assert.strictEqual(ratio?.scoreUnit, 'ratio_0_1');
4624
+ assert.strictEqual(stars?.scoreUnit, 'stars_1_5');
4625
+ });
4626
+ it('should handle missing scoreUnit', async () => {
4627
+ const today = getTestDate();
4628
+ const mockEvaluations = [
4629
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.85 } },
4630
+ ];
4631
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4632
+ const results = await backend.queryEvaluations({});
4633
+ assert.strictEqual(results.length, 1);
4634
+ assert.strictEqual(results[0].scoreValue, 0.85);
4635
+ assert.strictEqual(results[0].scoreUnit, undefined);
4636
+ });
4637
+ it('should handle empty scoreUnit string', async () => {
4638
+ const today = getTestDate();
4639
+ const mockEvaluations = [
4640
+ { timestamp: '2026-01-29T10:00:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.85, 'gen_ai.evaluation.score.unit': '' } },
4641
+ { timestamp: '2026-01-29T10:01:00Z', attributes: { 'gen_ai.evaluation.name': 'Test', 'gen_ai.evaluation.score.value': 0.90, 'gen_ai.evaluation.score.unit': ' ' } },
4642
+ ];
4643
+ writeJsonlFile(path.join(tempDir, `evaluations-${today}.jsonl`), mockEvaluations);
4644
+ const results = await backend.queryEvaluations({});
4645
+ assert.strictEqual(results.length, 2);
4646
+ // Empty and whitespace-only should be undefined
4647
+ assert.ok(results.every(r => r.scoreUnit === undefined));
4648
+ });
4649
+ });
3973
4650
  });
3974
4651
  //# sourceMappingURL=local-jsonl.test.js.map