diogenes 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.mise/config.toml +72 -0
- data/.mise/mise.lock +179 -0
- data/.mise/tasks/update-hk-import +79 -0
- data/.release-please-config.json +1 -1
- data/.release-please-manifest.json +2 -2
- data/CHANGELOG.md +7 -0
- data/CLAUDE.md +107 -99
- data/CONTRIBUTING.md +206 -0
- data/README.md +157 -134
- data/Rakefile +15 -1
- data/Steepfile +11 -0
- data/docs/gates.md +178 -0
- data/docs/targets.md +11 -0
- data/exe/diogenes +6 -0
- data/hk.pkl +46 -0
- data/lib/diogenes/cli/init.rb +88 -0
- data/lib/diogenes/cli.rb +95 -0
- data/lib/diogenes/templates/init/artifacts/decision_record.md.erb +53 -0
- data/lib/diogenes/templates/init/diogenes.rb +13 -0
- data/lib/diogenes/templates/init/hooks/README.md +15 -0
- data/lib/diogenes/templates/init/rules/five_gates.rb +33 -0
- data/lib/diogenes/templates/init/skills/example_skill.rb +33 -0
- data/lib/diogenes/version.rb +2 -1
- data/lib/diogenes.rb +27 -2
- data/sig/generated/diogenes/cli/init.rbs +34 -0
- data/sig/generated/diogenes/cli.rbs +34 -0
- data/sig/generated/diogenes/version.rbs +5 -0
- data/sig/generated/diogenes.rbs +26 -0
- metadata +23 -9
- data/docs/context.md +0 -60
- data/docs/contributing.md +0 -228
- data/docs/dashboard.md +0 -365
- data/docs/examples.md +0 -162
- data/docs/framework.md +0 -146
- data/mise.lock +0 -48
- data/mise.toml +0 -6
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Generated from lib/diogenes/cli.rb with RBS::Inline
|
|
2
|
+
|
|
3
|
+
module Diogenes
|
|
4
|
+
class Cli
|
|
5
|
+
COMMANDS: Hash[String, Class]
|
|
6
|
+
|
|
7
|
+
# : (?Array[String] argv, ?out: IO, ?err: IO) -> Integer
|
|
8
|
+
def self.run: (?Array[String] argv, ?out: IO, ?err: IO) -> Integer
|
|
9
|
+
|
|
10
|
+
# : (argv: Array[String], out: IO, err: IO) -> void
|
|
11
|
+
def initialize: (argv: Array[String], out: IO, err: IO) -> void
|
|
12
|
+
|
|
13
|
+
# : () -> Integer
|
|
14
|
+
def run: () -> Integer
|
|
15
|
+
|
|
16
|
+
private
|
|
17
|
+
|
|
18
|
+
# Run a command
|
|
19
|
+
# --
|
|
20
|
+
# : (Class, Array[String]) -> Integer
|
|
21
|
+
def run_command: (Class, Array[String]) -> Integer
|
|
22
|
+
|
|
23
|
+
# : () -> Integer
|
|
24
|
+
def print_help: () -> Integer
|
|
25
|
+
|
|
26
|
+
# : () -> Integer
|
|
27
|
+
def print_version: () -> Integer
|
|
28
|
+
|
|
29
|
+
# Print the unknown command message
|
|
30
|
+
# --
|
|
31
|
+
# : (String) -> String
|
|
32
|
+
def unknown_command_message: (String) -> String
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Generated from lib/diogenes.rb with RBS::Inline
|
|
2
|
+
|
|
3
|
+
module Diogenes
|
|
4
|
+
# Base error class for Diogenes.
|
|
5
|
+
class Error < StandardError
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
# Raised when invalid arguments are passed to a method.
|
|
9
|
+
class ArgumentError < ::ArgumentError
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Raised when the configuration is invalid.
|
|
13
|
+
class ValidationError < Error
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Raised when the user provides invalid arguments to the CLI.
|
|
17
|
+
class UserError < Error
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Returns the version of the Diogenes gem.
|
|
21
|
+
#
|
|
22
|
+
# This is managed by the release-please workflow.
|
|
23
|
+
# --
|
|
24
|
+
# : () -> String
|
|
25
|
+
def self.version: () -> String
|
|
26
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: diogenes
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Meagan Waller
|
|
@@ -30,29 +30,43 @@ dependencies:
|
|
|
30
30
|
- !ruby/object:Gem::Version
|
|
31
31
|
version: 2.6.0
|
|
32
32
|
description: A Ruby gem that holds your AI features to the light
|
|
33
|
-
executables:
|
|
33
|
+
executables:
|
|
34
|
+
- diogenes
|
|
34
35
|
extensions: []
|
|
35
36
|
extra_rdoc_files: []
|
|
36
37
|
files:
|
|
38
|
+
- ".mise/config.toml"
|
|
39
|
+
- ".mise/mise.lock"
|
|
40
|
+
- ".mise/tasks/update-hk-import"
|
|
37
41
|
- ".release-please-config.json"
|
|
38
42
|
- ".release-please-manifest.json"
|
|
39
43
|
- ".standard.yml"
|
|
40
44
|
- CHANGELOG.md
|
|
41
45
|
- CLAUDE.md
|
|
42
46
|
- CODE_OF_CONDUCT.md
|
|
47
|
+
- CONTRIBUTING.md
|
|
43
48
|
- LICENSE.txt
|
|
44
49
|
- README.md
|
|
45
50
|
- Rakefile
|
|
46
|
-
-
|
|
47
|
-
- docs/
|
|
48
|
-
- docs/
|
|
49
|
-
-
|
|
50
|
-
-
|
|
51
|
+
- Steepfile
|
|
52
|
+
- docs/gates.md
|
|
53
|
+
- docs/targets.md
|
|
54
|
+
- exe/diogenes
|
|
55
|
+
- hk.pkl
|
|
51
56
|
- lib/diogenes.rb
|
|
57
|
+
- lib/diogenes/cli.rb
|
|
58
|
+
- lib/diogenes/cli/init.rb
|
|
59
|
+
- lib/diogenes/templates/init/artifacts/decision_record.md.erb
|
|
60
|
+
- lib/diogenes/templates/init/diogenes.rb
|
|
61
|
+
- lib/diogenes/templates/init/hooks/README.md
|
|
62
|
+
- lib/diogenes/templates/init/rules/five_gates.rb
|
|
63
|
+
- lib/diogenes/templates/init/skills/example_skill.rb
|
|
52
64
|
- lib/diogenes/version.rb
|
|
53
|
-
- mise.lock
|
|
54
|
-
- mise.toml
|
|
55
65
|
- sig/diogenes.rbs
|
|
66
|
+
- sig/generated/diogenes.rbs
|
|
67
|
+
- sig/generated/diogenes/cli.rbs
|
|
68
|
+
- sig/generated/diogenes/cli/init.rbs
|
|
69
|
+
- sig/generated/diogenes/version.rbs
|
|
56
70
|
homepage: https://github.com/meaganewaller/diogenes/tree/main/diogenes
|
|
57
71
|
licenses:
|
|
58
72
|
- MIT
|
data/docs/context.md
DELETED
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
# Supplementary Context for Contributors
|
|
2
|
-
|
|
3
|
-
This file extends CLAUDE.md with additional context for common contribution scenarios. Read CLAUDE.md first.
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## Adding a New Gate
|
|
8
|
-
|
|
9
|
-
Gates are the core of Diogenes. Adding one requires:
|
|
10
|
-
|
|
11
|
-
1. A class in `lib/diogenes/gates/` inheriting `Diogenes::Gates::Base`
|
|
12
|
-
2. Implementation of `#valid?` and `#failure_message`
|
|
13
|
-
3. Registration in `Diogenes::Feature`
|
|
14
|
-
4. Documentation in `docs/framework.md`
|
|
15
|
-
5. An entry in the gate compatibility matrix (see below)
|
|
16
|
-
|
|
17
|
-
Gates should be conservative by default. If configuration is ambiguous, fail loudly rather than pass silently.
|
|
18
|
-
|
|
19
|
-
### Gate Compatibility Matrix
|
|
20
|
-
|
|
21
|
-
Some gate configurations are incompatible with each other. The matrix lives in `lib/diogenes/gates/compatibility.rb` and is checked after all individual gates are validated.
|
|
22
|
-
|
|
23
|
-
Current incompatibilities:
|
|
24
|
-
- `:failure_mode severity: :financial_dispute` + `:human_in_loop verified: false` → always raises
|
|
25
|
-
- `:failure_mode severity: :safety_risk` + `:user_calibration audience: :general_consumer` → always raises
|
|
26
|
-
- `:user_calibration audience: :general_consumer` + `:human_in_loop verified: false` → raises unless `:failure_mode severity: :cosmetic`
|
|
27
|
-
|
|
28
|
-
When adding a new gate, consider whether it creates new incompatibilities and add them to the matrix.
|
|
29
|
-
|
|
30
|
-
---
|
|
31
|
-
|
|
32
|
-
## Working on the Audit Log
|
|
33
|
-
|
|
34
|
-
The audit log has two layers:
|
|
35
|
-
|
|
36
|
-
**Core layer** (`lib/diogenes/audit/`) — plain Ruby, no ActiveRecord. Produces `Diogenes::Audit::Record` structs. This must remain framework-agnostic.
|
|
37
|
-
|
|
38
|
-
**Rails layer** (`lib/diogenes/engine.rb`) — ActiveRecord-backed persistence, only loaded when Rails is detected. Writes records from the core layer to the database.
|
|
39
|
-
|
|
40
|
-
If you're adding fields to audit records, add them to the struct first, then to the Rails migration. Never add Rails-specific code to the core layer.
|
|
41
|
-
|
|
42
|
-
---
|
|
43
|
-
|
|
44
|
-
## Working on the Review Engine
|
|
45
|
-
|
|
46
|
-
The review engine is a Rails engine mounted at a configurable path. It has no opinions about authentication — that's the host app's job. Document this clearly in any UI-related PRs.
|
|
47
|
-
|
|
48
|
-
The queue logic in `lib/diogenes/review/queue.rb` is tested without Rails. The engine views and controllers are tested with a minimal Rails app in `spec/dummy/`.
|
|
49
|
-
|
|
50
|
-
---
|
|
51
|
-
|
|
52
|
-
## Common Mistakes
|
|
53
|
-
|
|
54
|
-
**Making gates runtime checks.** Gates are boot-time. If you find yourself writing code that evaluates a gate during a request, reconsider the design.
|
|
55
|
-
|
|
56
|
-
**Adding LLM-specific code.** Diogenes doesn't know what LLM you're using. If a PR requires a specific client gem, it belongs in the user's codebase.
|
|
57
|
-
|
|
58
|
-
**Soft failures.** Diogenes raises when gates fail. It does not return nil, log a warning, or degrade gracefully. A feature that fails a gate should not serve AI output under any circumstances.
|
|
59
|
-
|
|
60
|
-
**Storing content in audit records.** Audit records store metadata and hashes, not raw content. Raw content may contain PII. The host app is responsible for its own content storage and retention.
|
data/docs/contributing.md
DELETED
|
@@ -1,228 +0,0 @@
|
|
|
1
|
-
# Contributing to Diogenes
|
|
2
|
-
|
|
3
|
-
Diogenes is opinionated by design. Before contributing, read the philosophy section of the README and the architecture notes in CLAUDE.md. A PR that conflicts with the core philosophy — even if the code is excellent — will not be merged.
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## What We're Looking For
|
|
8
|
-
|
|
9
|
-
**Good contributions:**
|
|
10
|
-
- New gates that encode a real, defensible constraint
|
|
11
|
-
- Improvements to grounding verification accuracy or the verifier prompt
|
|
12
|
-
- New eval matchers that cover failure modes existing matchers don't
|
|
13
|
-
- Drift detection improvements — staleness scoring, webhook integrations
|
|
14
|
-
- Better failure messages — they should be actionable, not cryptic
|
|
15
|
-
- Dashboard improvements that surface existing data more clearly
|
|
16
|
-
- Bug fixes with accompanying regression tests
|
|
17
|
-
- Documentation that makes the framework clearer
|
|
18
|
-
|
|
19
|
-
**Not a good fit:**
|
|
20
|
-
- LLM client integrations (Diogenes is provider-agnostic by design)
|
|
21
|
-
- RAG pipeline or embedding implementations
|
|
22
|
-
- Gates that can be bypassed with configuration
|
|
23
|
-
- Anything that makes a gate failure a warning instead of an error
|
|
24
|
-
- Database-backed eval golden pairs
|
|
25
|
-
- Authentication for the dashboard
|
|
26
|
-
|
|
27
|
-
If you're unsure whether your idea fits, open a discussion issue before writing code.
|
|
28
|
-
|
|
29
|
-
---
|
|
30
|
-
|
|
31
|
-
## Getting Started
|
|
32
|
-
|
|
33
|
-
```bash
|
|
34
|
-
git clone https://github.com/your-org/diogenes
|
|
35
|
-
cd diogenes
|
|
36
|
-
bundle install
|
|
37
|
-
bundle exec rspec
|
|
38
|
-
```
|
|
39
|
-
|
|
40
|
-
The test suite should pass with no configuration. If it doesn't, open an issue.
|
|
41
|
-
|
|
42
|
-
To run the full suite including Rails integration tests:
|
|
43
|
-
|
|
44
|
-
```bash
|
|
45
|
-
bundle exec rspec
|
|
46
|
-
bundle exec rspec spec/rails
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
---
|
|
50
|
-
|
|
51
|
-
## Contributing to Gates
|
|
52
|
-
|
|
53
|
-
Gates are the core primitive. Adding one is a significant decision — each gate adds cognitive load to every team that adopts the gem. New gates should address a failure mode that existing gates don't cover.
|
|
54
|
-
|
|
55
|
-
### Steps
|
|
56
|
-
|
|
57
|
-
1. Create `lib/diogenes/gates/your_gate.rb` inheriting from `Diogenes::Gates::Base`
|
|
58
|
-
2. Implement `#valid?` and `#failure_message`
|
|
59
|
-
3. Add any cross-gate incompatibilities to `lib/diogenes/gates/compatibility.rb`
|
|
60
|
-
4. Register in `Diogenes::Feature`
|
|
61
|
-
5. Write unit specs in `spec/diogenes/gates/your_gate_spec.rb`
|
|
62
|
-
6. Document in `docs/framework.md` following the existing format
|
|
63
|
-
7. Add to `docs/examples.md` if the gate meaningfully changes a verdict
|
|
64
|
-
|
|
65
|
-
### Gate Design Rules
|
|
66
|
-
|
|
67
|
-
- Gates fail loudly or pass. No warnings, no degraded modes.
|
|
68
|
-
- `#failure_message` must include the feature class name, the gate name, and a plain-English instruction for what to change.
|
|
69
|
-
- Gates are validated at boot. If your gate requires runtime information to validate, reconsider the design.
|
|
70
|
-
- Default configuration should be the most conservative option.
|
|
71
|
-
|
|
72
|
-
### Versioning
|
|
73
|
-
|
|
74
|
-
Adding a new gate incompatibility to the compatibility matrix is a **minor** bump if it catches configurations that were silently wrong. It is a **major** bump if it invalidates configurations that were intentionally valid.
|
|
75
|
-
|
|
76
|
-
---
|
|
77
|
-
|
|
78
|
-
## Contributing to Grounding Verification
|
|
79
|
-
|
|
80
|
-
The grounding verifier has two separate concerns: the prompt and the logic. Keep them separate.
|
|
81
|
-
|
|
82
|
-
### The Prompt (`lib/diogenes/grounding/prompt.rb`)
|
|
83
|
-
|
|
84
|
-
The prompt is versioned independently. When you change it:
|
|
85
|
-
|
|
86
|
-
- Increment `Diogenes::Grounding::Prompt::VERSION`
|
|
87
|
-
- Audit records store the prompt version — old records remain interpretable
|
|
88
|
-
- Add a spec that the new prompt correctly classifies a fixture set of (context, response) pairs as supported/unsupported/contradicted
|
|
89
|
-
|
|
90
|
-
Don't change the prompt to improve performance on one case without running the full fixture set. A prompt that is better on average but worse on contradictions is not an improvement.
|
|
91
|
-
|
|
92
|
-
### The Verifier Logic (`lib/diogenes/grounding/verifier.rb`)
|
|
93
|
-
|
|
94
|
-
The verifier accepts any LLM callable. Tests use a stub:
|
|
95
|
-
|
|
96
|
-
```ruby
|
|
97
|
-
Diogenes::Grounding::Verifier.stub(result: :pass)
|
|
98
|
-
Diogenes::Grounding::Verifier.stub(result: :flag, unsupported: ["claim text"])
|
|
99
|
-
```
|
|
100
|
-
|
|
101
|
-
Never make live LLM calls in specs.
|
|
102
|
-
|
|
103
|
-
### The Result (`lib/diogenes/grounding/result.rb`)
|
|
104
|
-
|
|
105
|
-
`Diogenes::Grounding::Result` is a value object. Adding fields is a minor bump. Removing or renaming fields is a major bump — audit records store serialized results.
|
|
106
|
-
|
|
107
|
-
---
|
|
108
|
-
|
|
109
|
-
## Contributing to Drift Detection
|
|
110
|
-
|
|
111
|
-
### Staleness Scoring (`lib/diogenes/drift/staleness_score.rb`)
|
|
112
|
-
|
|
113
|
-
The staleness score is a pure function of timestamps and diff size. Keep it that way — no database access, no I/O. Specs use fixed timestamps.
|
|
114
|
-
|
|
115
|
-
```ruby
|
|
116
|
-
score = Diogenes::Drift::StalenessScore.calculate(
|
|
117
|
-
source_updated_at: 47.days.ago,
|
|
118
|
-
indexed_at: 89.days.ago,
|
|
119
|
-
diff_size: :major
|
|
120
|
-
)
|
|
121
|
-
```
|
|
122
|
-
|
|
123
|
-
### Re-index Jobs
|
|
124
|
-
|
|
125
|
-
Diogenes ships a base job class. The host app subclasses it with embedding logic. Never add embedding implementation to the base class.
|
|
126
|
-
|
|
127
|
-
```ruby
|
|
128
|
-
# What we ship
|
|
129
|
-
class Diogenes::Drift::ReindexJob < ApplicationJob
|
|
130
|
-
def perform(document_id)
|
|
131
|
-
raise NotImplementedError, "Subclass with your embedding logic"
|
|
132
|
-
end
|
|
133
|
-
end
|
|
134
|
-
|
|
135
|
-
# What the host app writes
|
|
136
|
-
class ReindexDocumentJob < Diogenes::Drift::ReindexJob
|
|
137
|
-
def perform(document_id)
|
|
138
|
-
# fetch, embed, store
|
|
139
|
-
end
|
|
140
|
-
end
|
|
141
|
-
```
|
|
142
|
-
|
|
143
|
-
### Webhooks
|
|
144
|
-
|
|
145
|
-
Drift alert webhooks send a standard payload. If you're adding webhook support for a new event type, use the existing payload shape and add a `type` field. Don't introduce a new shape.
|
|
146
|
-
|
|
147
|
-
---
|
|
148
|
-
|
|
149
|
-
## Contributing to the Eval Runner
|
|
150
|
-
|
|
151
|
-
### Adding Matchers (`lib/diogenes/evals/matchers.rb`)
|
|
152
|
-
|
|
153
|
-
Matchers are composable predicates. A new matcher should:
|
|
154
|
-
|
|
155
|
-
- Accept a response struct and return true/false
|
|
156
|
-
- Have a clear, descriptive failure message when it returns false
|
|
157
|
-
- Be usable inside `all_of` and `one_of`
|
|
158
|
-
- Be tested against fixture responses, never live LLM calls
|
|
159
|
-
|
|
160
|
-
```ruby
|
|
161
|
-
module Diogenes
|
|
162
|
-
module Evals
|
|
163
|
-
module Matchers
|
|
164
|
-
def your_matcher(arg)
|
|
165
|
-
->(result) {
|
|
166
|
-
# evaluate result, return true/false
|
|
167
|
-
}
|
|
168
|
-
end
|
|
169
|
-
end
|
|
170
|
-
end
|
|
171
|
-
end
|
|
172
|
-
```
|
|
173
|
-
|
|
174
|
-
### Golden Pairs Live in Code
|
|
175
|
-
|
|
176
|
-
Don't add database storage for golden pairs. They are version-controlled files. If a contributor wants to propose database-backed pairs, the answer is no — visibility in code review is a feature, not a limitation.
|
|
177
|
-
|
|
178
|
-
### Regression Detection
|
|
179
|
-
|
|
180
|
-
The regression detector (`lib/diogenes/evals/regression.rb`) stores the last passing response and diffs it against the first failing one. If you're improving diff quality, use the fixture set in `spec/fixtures/evals/regressions/`. Don't introduce a dependency on a diff library — the current implementation is intentionally minimal.
|
|
181
|
-
|
|
182
|
-
---
|
|
183
|
-
|
|
184
|
-
## Contributing to the Dashboard
|
|
185
|
-
|
|
186
|
-
The dashboard engine has no authentication and no CSS framework dependency. Keep it that way.
|
|
187
|
-
|
|
188
|
-
### Adding a New View
|
|
189
|
-
|
|
190
|
-
Views use ERB. No ViewComponent, no Stimulus, no Turbo (unless it's already a dependency by the time you're reading this — check the gemspec). Interactivity should be achievable with standard Rails UJS or vanilla JS.
|
|
191
|
-
|
|
192
|
-
### Adding a New Route
|
|
193
|
-
|
|
194
|
-
Add to `lib/diogenes/engine/config/routes.rb`. Follow the existing namespace pattern. Every new route needs a corresponding controller action and view, even if the view is a stub.
|
|
195
|
-
|
|
196
|
-
### What the Dashboard Must Never Do
|
|
197
|
-
|
|
198
|
-
- Make LLM calls
|
|
199
|
-
- Trigger re-indexing without an explicit user action
|
|
200
|
-
- Display raw query or response content (hashes only — PII protection)
|
|
201
|
-
- Require specific authentication middleware
|
|
202
|
-
|
|
203
|
-
---
|
|
204
|
-
|
|
205
|
-
## Pull Request Guidelines
|
|
206
|
-
|
|
207
|
-
- One logical change per PR
|
|
208
|
-
- Tests for all new behaviour
|
|
209
|
-
- Updated documentation if you're changing or adding functionality
|
|
210
|
-
- A clear description of *why*, not just *what*
|
|
211
|
-
|
|
212
|
-
PRs that add code without updating relevant docs will be sent back.
|
|
213
|
-
|
|
214
|
-
---
|
|
215
|
-
|
|
216
|
-
## Versioning
|
|
217
|
-
|
|
218
|
-
Diogenes follows semantic versioning strictly.
|
|
219
|
-
|
|
220
|
-
- **Patch:** bug fixes, documentation, internal refactors
|
|
221
|
-
- **Minor:** new gates, new matchers, new dashboard views, non-breaking additions
|
|
222
|
-
- **Major:** changes to gate semantics, changes to audit record shape, removal of features, breaking DSL changes
|
|
223
|
-
|
|
224
|
-
---
|
|
225
|
-
|
|
226
|
-
## Code of Conduct
|
|
227
|
-
|
|
228
|
-
Be direct. Be specific. Assume good intent. If you're reviewing a PR, explain your reasoning — "this conflicts with the philosophy" is not sufficient without explaining which part and why.
|