rubyllm-observ 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +54 -6
  3. data/app/assets/stylesheets/observ/_annotations.scss +114 -103
  4. data/app/assets/stylesheets/observ/_card.scss +58 -49
  5. data/app/assets/stylesheets/observ/_chat.scss +247 -155
  6. data/app/assets/stylesheets/observ/_components.scss +622 -340
  7. data/app/assets/stylesheets/observ/_dashboard.scss +31 -28
  8. data/app/assets/stylesheets/observ/_datasets.scss +494 -547
  9. data/app/assets/stylesheets/observ/_drawer.scss +250 -228
  10. data/app/assets/stylesheets/observ/_filters.scss +139 -0
  11. data/app/assets/stylesheets/observ/_json_viewer.scss +103 -97
  12. data/app/assets/stylesheets/observ/_layout.scss +443 -178
  13. data/app/assets/stylesheets/observ/_metrics.scss +79 -76
  14. data/app/assets/stylesheets/observ/_namespace.scss +18 -0
  15. data/app/assets/stylesheets/observ/_observations.scss +122 -119
  16. data/app/assets/stylesheets/observ/_pagination.scss +129 -112
  17. data/app/assets/stylesheets/observ/_prompts.scss +485 -269
  18. data/app/assets/stylesheets/observ/_reset.scss +249 -0
  19. data/app/assets/stylesheets/observ/_table.scss +46 -38
  20. data/app/assets/stylesheets/observ/_variables.scss +54 -0
  21. data/app/assets/stylesheets/observ/application.scss +3 -0
  22. data/app/controllers/observ/dataset_run_items_controller.rb +0 -1
  23. data/app/controllers/observ/review_queue_controller.rb +154 -0
  24. data/app/controllers/observ/scores_controller.rb +64 -0
  25. data/app/controllers/observ/sessions_controller.rb +23 -0
  26. data/app/helpers/observ/application_helper.rb +1 -0
  27. data/app/helpers/observ/reviews_helper.rb +33 -0
  28. data/app/models/concerns/observ/json_queryable.rb +138 -0
  29. data/app/models/concerns/observ/reviewable.rb +41 -0
  30. data/app/models/concerns/observ/scoreable.rb +34 -0
  31. data/app/models/observ/dataset_run_item.rb +3 -13
  32. data/app/models/observ/review_item.rb +48 -0
  33. data/app/models/observ/score.rb +38 -6
  34. data/app/models/observ/session.rb +5 -1
  35. data/app/models/observ/trace.rb +3 -0
  36. data/app/services/observ/evaluators/base_evaluator.rb +0 -1
  37. data/app/services/observ/guardrail_service.rb +128 -0
  38. data/app/views/kaminari/_first_page.html.erb +1 -1
  39. data/app/views/kaminari/_gap.html.erb +1 -1
  40. data/app/views/kaminari/_last_page.html.erb +1 -1
  41. data/app/views/kaminari/_next_page.html.erb +1 -1
  42. data/app/views/kaminari/_page.html.erb +1 -1
  43. data/app/views/kaminari/_paginator.html.erb +1 -1
  44. data/app/views/kaminari/_prev_page.html.erb +1 -1
  45. data/app/views/kaminari/observ/_first_page.html.erb +1 -1
  46. data/app/views/kaminari/observ/_gap.html.erb +1 -1
  47. data/app/views/kaminari/observ/_last_page.html.erb +1 -1
  48. data/app/views/kaminari/observ/_next_page.html.erb +1 -1
  49. data/app/views/kaminari/observ/_page.html.erb +1 -1
  50. data/app/views/kaminari/observ/_paginator.html.erb +1 -1
  51. data/app/views/kaminari/observ/_prev_page.html.erb +1 -1
  52. data/app/views/layouts/observ/application.html.erb +96 -58
  53. data/app/views/observ/annotations/_form.html.erb +5 -5
  54. data/app/views/observ/annotations/index.html.erb +4 -4
  55. data/app/views/observ/annotations/sessions_index.html.erb +9 -9
  56. data/app/views/observ/annotations/traces_index.html.erb +9 -9
  57. data/app/views/observ/chats/_form.html.erb +7 -7
  58. data/app/views/observ/datasets/index.html.erb +6 -6
  59. data/app/views/observ/messages/_form.html.erb +11 -12
  60. data/app/views/observ/observations/index.html.erb +3 -4
  61. data/app/views/observ/prompts/_form.html.erb +37 -38
  62. data/app/views/observ/prompts/_new_form.html.erb +37 -38
  63. data/app/views/observ/prompts/compare.html.erb +59 -55
  64. data/app/views/observ/prompts/edit.html.erb +3 -3
  65. data/app/views/observ/prompts/index.html.erb +9 -9
  66. data/app/views/observ/prompts/new.html.erb +3 -3
  67. data/app/views/observ/prompts/show.html.erb +2 -2
  68. data/app/views/observ/prompts/versions.html.erb +22 -22
  69. data/app/views/observ/review_queue/_item.html.erb +39 -0
  70. data/app/views/observ/review_queue/_stats.html.erb +18 -0
  71. data/app/views/observ/review_queue/index.html.erb +49 -0
  72. data/app/views/observ/review_queue/show.html.erb +76 -0
  73. data/app/views/observ/review_queue/stats.html.erb +100 -0
  74. data/app/views/observ/scores/_form.html.erb +39 -0
  75. data/app/views/observ/scores/create.turbo_stream.erb +10 -0
  76. data/app/views/observ/sessions/_chat.html.erb +59 -0
  77. data/app/views/observ/sessions/_metadata.html.erb +17 -0
  78. data/app/views/observ/sessions/_metrics.html.erb +81 -0
  79. data/app/views/observ/sessions/_traces.html.erb +92 -0
  80. data/app/views/observ/sessions/annotations_drawer.turbo_stream.erb +8 -1
  81. data/app/views/observ/sessions/index.html.erb +60 -4
  82. data/app/views/observ/sessions/show.html.erb +4 -217
  83. data/app/views/observ/traces/_details.html.erb +47 -0
  84. data/app/views/observ/traces/_input.html.erb +10 -0
  85. data/app/views/observ/traces/_metadata.html.erb +10 -0
  86. data/app/views/observ/traces/_observations.html.erb +172 -0
  87. data/app/views/observ/traces/_output.html.erb +10 -0
  88. data/app/views/observ/traces/annotations_drawer.turbo_stream.erb +8 -1
  89. data/app/views/observ/traces/index.html.erb +3 -4
  90. data/app/views/observ/traces/show.html.erb +5 -232
  91. data/config/routes.rb +14 -0
  92. data/db/migrate/015_refactor_scores_to_polymorphic.rb +27 -0
  93. data/db/migrate/016_create_observ_review_items.rb +25 -0
  94. data/lib/observ/version.rb +1 -1
  95. data/lib/rubyllm-observ.rb +1 -0
  96. metadata +31 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5e4057641cc19483f6ff46f6cfd5885c1ae25980e405626689a532eda31845e3
4
- data.tar.gz: 273fad2663c582e530c43920b17b6e67c9f364152eb04a79a3277e9f4b6e8f19
3
+ metadata.gz: '0585d05a3b7dcf8789fafd4e6b2fb9a7a938b80427caf2051f1de0b4b312e6e2'
4
+ data.tar.gz: f11909cce646368528328c3db82a469983c02e1b157c100cdb633ec54ffb97ce
5
5
  SHA512:
6
- metadata.gz: a7c42172c86d459528032ee75e8a8ac4825444030d4935b567390e6baedb953b797591f26a4164f203f4ead5572b546f0d5b2809c07f2d2f9fca5ab491784e56
7
- data.tar.gz: 3f8bea39bdb680bad629bf06c2417520ebd249a6aafc479c174ed76ec15aabbc44c854a5b5da507644dc907bdcc8893ce8704ea16742006108e94c0605eb8577
6
+ metadata.gz: ac12fb93b47733c0e63adcf4959a050758a9131e71dfa54c1043e84e344c2ba392d9f6f4dd28a1eebff690eb1c51b3ff6707c5f303b43c7618e7477ebbba63eb
7
+ data.tar.gz: c2d9d34e3f2e72e2382d3ca9f29f28e871a0897f3149e35b6531b43de21b46756b60dcd0d1a831660528c856613f2bbbe16aa5e0891b602167a1ebb2b14c7050
data/README.md CHANGED
@@ -30,7 +30,7 @@ For LLM observability without the chat UI:
30
30
  **1. Add to Gemfile:**
31
31
 
32
32
  ```ruby
33
- gem "observ"
33
+ gem "rubyllm-observ"
34
34
  ```
35
35
 
36
36
  **2. Install:**
@@ -61,7 +61,7 @@ For full observability + interactive agent testing UI:
61
61
  **1. Add to Gemfile:**
62
62
 
63
63
  ```ruby
64
- gem "observ"
64
+ gem "rubyllm-observ"
65
65
  gem "ruby_llm" # Required for chat feature
66
66
  ```
67
67
 
@@ -89,7 +89,7 @@ rails db:migrate
89
89
  - Observ enhancements on RubyLLM infrastructure
90
90
  - Example agents and tools
91
91
 
92
- See **[Chat Installation Guide](docs/CHAT_INSTALLATION.md)** for detailed setup.
92
+ See **Creating Agents and Services** in `docs/creating-agents-and-services.md` for detailed setup.
93
93
 
94
94
  ---
95
95
 
@@ -345,7 +345,7 @@ end
345
345
  - All chat interactions appear in `/observ/sessions`
346
346
  - Full observability of tokens, costs, and tool calls
347
347
 
348
- See **[Chat Installation Guide](docs/CHAT_INSTALLATION.md)** for complete documentation.
348
+ See `docs/creating-agents-and-services.md` for complete documentation on creating agents.
349
349
 
350
350
  ### Phase Tracking (Optional Chat Feature)
351
351
 
@@ -528,6 +528,54 @@ session.annotations.create(
528
528
  # Visit /observ/annotations/export in browser
529
529
  ```
530
530
 
531
+ ### Datasets & Evaluators
532
+
533
+ Observ includes a dataset and evaluator system for testing LLM outputs against predefined inputs and scoring results.
534
+
535
+ **Creating a dataset:**
536
+
537
+ ```ruby
538
+ # Create a dataset
539
+ dataset = Observ::Dataset.create!(
540
+ name: "Article Recommendations Test Set",
541
+ description: "Test cases for article recommendation system"
542
+ )
543
+
544
+ # Add test items
545
+ dataset.items.create!(
546
+ input: { user_query: "Recommend articles for someone feeling anxious" },
547
+ expected_output: { recommended_articles: ["art_001", "art_003"] }
548
+ )
549
+ ```
550
+
551
+ **Running evaluations:**
552
+
553
+ ```ruby
554
+ # Create a dataset run
555
+ run = dataset.runs.create!(
556
+ name: "GPT-4 baseline",
557
+ model_name: "gpt-4",
558
+ status: :pending
559
+ )
560
+
561
+ # Run items are created when executing your LLM against the dataset
562
+ # Each run item links a dataset item to a trace
563
+
564
+ # Score outputs with built-in evaluators
565
+ Observ::Evaluators::ExactMatchEvaluator.new.evaluate(run_item)
566
+ Observ::Evaluators::ContainsEvaluator.new(keywords: ["anxiety"]).evaluate(run_item)
567
+ ```
568
+
569
+ **Built-in evaluators:**
570
+ - `ExactMatchEvaluator` - Exact string match against expected output
571
+ - `ContainsEvaluator` - Check if output contains specific keywords
572
+ - `JsonStructureEvaluator` - Validate JSON structure
573
+ - `LlmJudgeEvaluator` - Use an LLM to score output quality
574
+
575
+ Visit `/observ/datasets` to manage datasets and view run results in the UI.
576
+
577
+ See `docs/dataset_and_evaluator_feature.md` for complete documentation.
578
+
531
579
  ## Asset Management
532
580
 
533
581
  Observ provides several tools for managing assets in your Rails application:
@@ -622,7 +670,7 @@ Observ uses:
622
670
  ### Chat Feature (Optional Add-on)
623
671
  - **RubyLLM**: Required for chat/agent testing feature
624
672
  - Installed with `rails generate observ:install:chat`
625
- - See [Chat Installation Guide](docs/CHAT_INSTALLATION.md)
673
+ - See `docs/creating-agents-and-services.md` for agent documentation
626
674
 
627
675
  ## Testing
628
676
 
@@ -775,4 +823,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
775
823
 
776
824
  ## Version
777
825
 
778
- Current version: 0.1.0
826
+ Current version: 0.6.0
@@ -1,127 +1,138 @@
1
- .observ-drawer__section {
2
- padding: 1.5rem;
3
- border-bottom: 1px solid var(--observ-border-color, #e5e7eb);
1
+ @import 'variables';
2
+ @import 'namespace';
4
3
 
5
- &:last-child {
6
- border-bottom: none;
7
- }
8
- }
9
-
10
- .observ-drawer__section-title {
11
- font-size: 1rem;
12
- font-weight: 600;
13
- color: var(--observ-text-primary, #111827);
14
- margin: 0 0 1rem 0;
15
- }
4
+ @include observ-scoped {
5
+ .observ-drawer__section {
6
+ padding: $observ-spacing-lg;
7
+ border-bottom: 1px solid $observ-border-color;
16
8
 
17
- .observ-annotation-form {
18
- .observ-form-field {
19
- margin-bottom: 1rem;
9
+ &:last-child {
10
+ border-bottom: none;
11
+ }
20
12
  }
21
13
 
22
- .observ-form-label {
23
- display: block;
24
- font-size: 0.875rem;
25
- font-weight: 500;
26
- color: var(--observ-text-primary, #111827);
27
- margin-bottom: 0.5rem;
14
+ .observ-drawer__section-title {
15
+ font-size: $observ-font-size-base;
16
+ font-weight: 600;
17
+ color: $observ-text-primary;
18
+ margin: 0 0 $observ-spacing-md 0;
28
19
  }
29
20
 
30
- .observ-form-textarea {
31
- width: 100%;
32
- padding: 0.75rem;
33
- border: 1px solid var(--observ-border-color, #d1d5db);
34
- border-radius: 0.375rem;
35
- font-size: 0.875rem;
36
- font-family: inherit;
37
- resize: vertical;
38
- transition: border-color 0.15s ease-in-out, box-shadow 0.15s ease-in-out;
39
-
40
- &:focus {
41
- outline: none;
42
- border-color: var(--observ-primary-color, #3b82f6);
43
- box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1);
21
+ .observ-annotation-form {
22
+ .observ-form-field {
23
+ margin-bottom: $observ-spacing-md;
24
+ }
25
+
26
+ .observ-form-label {
27
+ display: block;
28
+ font-size: $observ-font-size-sm;
29
+ font-weight: 500;
30
+ color: $observ-text-primary;
31
+ margin-bottom: $observ-spacing-xs;
32
+ }
33
+
34
+ .observ-form-textarea {
35
+ width: 100%;
36
+ padding: $observ-spacing-sm $observ-spacing-md;
37
+ border: 1px solid $observ-border-color;
38
+ border-radius: $observ-border-radius;
39
+ font-size: $observ-font-size-sm;
40
+ font-family: inherit;
41
+ background-color: $observ-bg-elevated;
42
+ color: $observ-text-primary;
43
+ resize: vertical;
44
+ transition: border-color 0.15s ease-in-out, box-shadow 0.15s ease-in-out;
45
+
46
+ &::placeholder {
47
+ color: $observ-text-muted;
48
+ }
49
+
50
+ &:focus {
51
+ outline: none;
52
+ border-color: $observ-primary;
53
+ box-shadow: 0 0 0 3px rgba($observ-primary, 0.1);
54
+ }
55
+ }
56
+
57
+ .observ-form-error {
58
+ display: block;
59
+ font-size: $observ-font-size-xs;
60
+ color: $observ-danger;
61
+ margin-top: $observ-spacing-xs;
44
62
  }
45
- }
46
63
 
47
- .observ-form-error {
48
- display: block;
49
- font-size: 0.75rem;
50
- color: var(--observ-error-color, #ef4444);
51
- margin-top: 0.25rem;
64
+ .observ-form-actions {
65
+ display: flex;
66
+ justify-content: flex-end;
67
+ }
52
68
  }
53
69
 
54
- .observ-form-actions {
70
+ .observ-annotations-list {
55
71
  display: flex;
56
- justify-content: flex-end;
72
+ flex-direction: column;
73
+ gap: $observ-spacing-md;
57
74
  }
58
- }
59
-
60
- .observ-annotations-list {
61
- display: flex;
62
- flex-direction: column;
63
- gap: 1rem;
64
- }
65
75
 
66
- .observ-annotation {
67
- padding: 1rem;
68
- background: var(--observ-surface-color, #f9fafb);
69
- border: 1px solid var(--observ-border-color, #e5e7eb);
70
- border-radius: 0.5rem;
71
- }
76
+ .observ-annotation {
77
+ padding: $observ-spacing-md;
78
+ background: $observ-bg-elevated;
79
+ border: 1px solid $observ-border-color;
80
+ border-radius: $observ-border-radius;
81
+ }
72
82
 
73
- .observ-annotation__header {
74
- display: flex;
75
- justify-content: space-between;
76
- align-items: center;
77
- margin-bottom: 0.75rem;
78
- padding-bottom: 0.5rem;
79
- border-bottom: 1px solid var(--observ-border-color, #e5e7eb);
80
- }
83
+ .observ-annotation__header {
84
+ display: flex;
85
+ justify-content: space-between;
86
+ align-items: center;
87
+ margin-bottom: $observ-spacing-sm;
88
+ padding-bottom: $observ-spacing-xs;
89
+ border-bottom: 1px solid $observ-border-subtle;
90
+ }
81
91
 
82
- .observ-annotation__time {
83
- font-size: 0.75rem;
84
- color: var(--observ-text-secondary, #6b7280);
85
- }
92
+ .observ-annotation__time {
93
+ font-size: $observ-font-size-xs;
94
+ color: $observ-text-muted;
95
+ }
86
96
 
87
- .observ-annotation__delete {
88
- background: none;
89
- border: none;
90
- color: var(--observ-error-color, #ef4444);
91
- font-size: 0.75rem;
92
- cursor: pointer;
93
- padding: 0.25rem 0.5rem;
94
- border-radius: 0.25rem;
95
- transition: background-color 0.15s ease-in-out;
96
-
97
- &:hover {
98
- background-color: rgba(239, 68, 68, 0.1);
97
+ .observ-annotation__delete {
98
+ background: none;
99
+ border: none;
100
+ color: $observ-danger;
101
+ font-size: $observ-font-size-xs;
102
+ cursor: pointer;
103
+ padding: $observ-spacing-xs $observ-spacing-sm;
104
+ border-radius: $observ-border-radius-sm;
105
+ transition: background-color 0.15s ease-in-out;
106
+
107
+ &:hover {
108
+ background-color: rgba($observ-danger, 0.1);
109
+ }
99
110
  }
100
- }
101
111
 
102
- .observ-annotation__content {
103
- font-size: 0.875rem;
104
- color: var(--observ-text-primary, #111827);
105
- line-height: 1.5;
106
- white-space: pre-wrap;
107
- word-break: break-word;
112
+ .observ-annotation__content {
113
+ font-size: $observ-font-size-sm;
114
+ color: $observ-text-primary;
115
+ line-height: 1.5;
116
+ white-space: pre-wrap;
117
+ word-break: break-word;
108
118
 
109
- p {
110
- margin: 0 0 0.5rem 0;
111
-
112
- &:last-child {
113
- margin-bottom: 0;
119
+ p {
120
+ margin: 0 0 $observ-spacing-xs 0;
121
+
122
+ &:last-child {
123
+ margin-bottom: 0;
124
+ }
114
125
  }
115
126
  }
116
- }
117
127
 
118
- .observ-empty-state {
119
- padding: 2rem;
120
- text-align: center;
121
- }
128
+ .observ-empty-state {
129
+ padding: $observ-spacing-xl;
130
+ text-align: center;
131
+ }
122
132
 
123
- .observ-empty-state__text {
124
- font-size: 0.875rem;
125
- color: var(--observ-text-secondary, #6b7280);
126
- margin: 0;
133
+ .observ-empty-state__text {
134
+ font-size: $observ-font-size-sm;
135
+ color: $observ-text-muted;
136
+ margin: 0;
137
+ }
127
138
  }
@@ -1,52 +1,61 @@
1
1
  @import 'variables';
2
-
3
- .observ-card {
4
- background-color: $observ-white;
5
- border-radius: $observ-border-radius;
6
- box-shadow: $observ-shadow;
7
- overflow: hidden;
8
-
9
- &__header {
10
- padding: $observ-spacing-lg;
11
- border-bottom: 1px solid $observ-gray-200;
12
- display: flex;
13
- align-items: center;
14
- justify-content: space-between;
15
- }
16
-
17
- &__title {
18
- font-size: $observ-font-size-xl;
19
- font-weight: 600;
20
- margin: 0;
21
- color: $observ-gray-900;
22
- }
23
-
24
- &__actions {
25
- display: flex;
26
- gap: $observ-spacing-sm;
27
- }
28
-
29
- &__body {
30
- padding: $observ-spacing-lg;
31
- }
32
-
33
- &__footer {
34
- padding: $observ-spacing-lg;
35
- border-top: 1px solid $observ-gray-200;
36
- background-color: $observ-gray-50;
37
- }
38
-
39
- &__empty {
40
- text-align: center;
41
- padding: $observ-spacing-2xl;
42
- color: $observ-gray-500;
43
- }
44
-
45
- &--span-2 {
46
- grid-column: span 2;
47
- }
48
-
49
- &--highlighted {
50
- border: 2px solid $observ-primary;
2
+ @import 'namespace';
3
+
4
+ @include observ-scoped {
5
+ .observ-card {
6
+ background-color: $observ-bg-surface;
7
+ border: 1px solid $observ-border-color;
8
+ border-radius: $observ-border-radius;
9
+ overflow: hidden;
10
+ transition: all 0.2s ease-in-out;
11
+
12
+ &:hover {
13
+ border-color: $observ-border-strong;
14
+ }
15
+
16
+ &__header {
17
+ padding: $observ-spacing-lg;
18
+ border-bottom: 1px solid $observ-border-subtle;
19
+ display: flex;
20
+ align-items: center;
21
+ justify-content: space-between;
22
+ }
23
+
24
+ &__title {
25
+ font-size: $observ-font-size-xl;
26
+ font-weight: 600;
27
+ margin: 0;
28
+ color: $observ-text-primary;
29
+ letter-spacing: -0.01em;
30
+ }
31
+
32
+ &__actions {
33
+ display: flex;
34
+ gap: $observ-spacing-sm;
35
+ }
36
+
37
+ &__body {
38
+ padding: $observ-spacing-lg;
39
+ }
40
+
41
+ &__footer {
42
+ padding: $observ-spacing-lg;
43
+ border-top: 1px solid $observ-border-subtle;
44
+ background-color: rgba($observ-black, 0.1);
45
+ }
46
+
47
+ &__empty {
48
+ text-align: center;
49
+ padding: $observ-spacing-2xl;
50
+ color: $observ-text-muted;
51
+ }
52
+
53
+ &--span-2 {
54
+ grid-column: span 2;
55
+ }
56
+
57
+ &--highlighted {
58
+ border: 2px solid $observ-primary;
59
+ }
51
60
  }
52
61
  }