loreguard-cli 0.12.2__tar.gz → 0.14.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/.github/workflows/release.yml +28 -8
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/PKG-INFO +1 -1
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/pyproject.toml +1 -1
- loreguard_cli-0.14.0rc1/sdk/API.md +249 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/sdk/python/loreguard_sdk.py +59 -3
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/cli.py +45 -2
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/config.py +40 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/http_server.py +82 -6
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/llama_server.py +22 -2
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/llm.py +16 -15
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/main.py +5 -3
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/widgets/banner.py +3 -2
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tunnel.py +8 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/.claude/skills/llama-cpp-troubleshooting/SKILL.md +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/.env.example +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/.gitignore +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/LICENSE +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/README.md +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/THIRD_PARTY_NOTICES.md +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/scripts/build.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/sdk/csharp/LoreguardSDK.cs +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/sdk/gdscript/LoreguardSDK.gd +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/sdk/javascript/loreguard-sdk.js +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/__init__.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/__main__.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/chunk_detector.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/dialogue_act_classifier.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/hf_discovery.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/intent_classifier.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/models_registry.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/nli.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/npc_chat.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/runtime.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/steam.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/term_ui.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/__init__.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/app.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/modals/__init__.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/modals/auth_menu.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/modals/npc_chat.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/modals/token_input.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/modals/unified_palette.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/screens/__init__.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/screens/auth.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/screens/main.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/screens/model_select.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/screens/nli_setup.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/screens/running.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/styles.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/widgets/__init__.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/widgets/footer.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/widgets/hardware_info.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/widgets/npc_chat.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/widgets/server_monitor.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/tui/widgets/status_panel.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/src/wizard.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/templates/llama31-no-tools.jinja +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/tests/test_nli_hhem.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/tests/test_websocket_timeout.py +0 -0
- {loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/uv.lock +0 -0
|
@@ -7,8 +7,21 @@ on:
|
|
|
7
7
|
workflow_dispatch:
|
|
8
8
|
|
|
9
9
|
jobs:
|
|
10
|
-
build
|
|
11
|
-
|
|
10
|
+
build:
|
|
11
|
+
strategy:
|
|
12
|
+
matrix:
|
|
13
|
+
include:
|
|
14
|
+
- os: windows-latest
|
|
15
|
+
artifact-name: loreguard-windows-amd64.exe
|
|
16
|
+
build-output: dist/loreguard.exe
|
|
17
|
+
- os: macos-latest
|
|
18
|
+
artifact-name: loreguard-darwin-arm64
|
|
19
|
+
build-output: dist/loreguard
|
|
20
|
+
- os: ubuntu-latest
|
|
21
|
+
artifact-name: loreguard-linux-amd64
|
|
22
|
+
build-output: dist/loreguard
|
|
23
|
+
|
|
24
|
+
runs-on: ${{ matrix.os }}
|
|
12
25
|
|
|
13
26
|
steps:
|
|
14
27
|
- uses: actions/checkout@v4
|
|
@@ -26,30 +39,37 @@ jobs:
|
|
|
26
39
|
- name: Build with PyInstaller
|
|
27
40
|
run: python scripts/build.py
|
|
28
41
|
|
|
42
|
+
- name: Rename artifact
|
|
43
|
+
shell: bash
|
|
44
|
+
run: mv ${{ matrix.build-output }} dist/${{ matrix.artifact-name }}
|
|
45
|
+
|
|
29
46
|
- name: Upload artifact
|
|
30
47
|
uses: actions/upload-artifact@v4
|
|
31
48
|
with:
|
|
32
|
-
name:
|
|
33
|
-
path: dist
|
|
49
|
+
name: ${{ matrix.artifact-name }}
|
|
50
|
+
path: dist/${{ matrix.artifact-name }}
|
|
34
51
|
|
|
35
52
|
release:
|
|
36
|
-
needs: build
|
|
53
|
+
needs: build
|
|
37
54
|
runs-on: ubuntu-latest
|
|
38
55
|
if: startsWith(github.ref, 'refs/tags/')
|
|
39
56
|
permissions:
|
|
40
57
|
contents: write
|
|
41
58
|
|
|
42
59
|
steps:
|
|
43
|
-
- name: Download
|
|
60
|
+
- name: Download all artifacts
|
|
44
61
|
uses: actions/download-artifact@v4
|
|
45
62
|
with:
|
|
46
|
-
name: loreguard-windows.exe
|
|
47
63
|
path: artifacts
|
|
64
|
+
merge-multiple: true
|
|
48
65
|
|
|
49
66
|
- name: Create Release
|
|
50
67
|
uses: softprops/action-gh-release@v1
|
|
51
68
|
with:
|
|
52
|
-
files:
|
|
69
|
+
files: |
|
|
70
|
+
artifacts/loreguard-windows-amd64.exe
|
|
71
|
+
artifacts/loreguard-darwin-arm64
|
|
72
|
+
artifacts/loreguard-linux-amd64
|
|
53
73
|
generate_release_notes: true
|
|
54
74
|
env:
|
|
55
75
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: loreguard-cli
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.14.0rc1
|
|
4
4
|
Summary: Local inference client for Loreguard NPCs
|
|
5
5
|
Project-URL: Homepage, https://loreguard.com
|
|
6
6
|
Project-URL: Documentation, https://github.com/beyond-logic-labs/loreguard-cli#readme
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
# Loreguard Client API Reference
|
|
2
|
+
|
|
3
|
+
Loreguard Client exposes a local HTTP API that any game can call. The server runs on `127.0.0.1` with a dynamic port written to `runtime.json`.
|
|
4
|
+
|
|
5
|
+
## Service Discovery
|
|
6
|
+
|
|
7
|
+
On startup, loreguard-client writes a `runtime.json` file:
|
|
8
|
+
|
|
9
|
+
| Platform | Path |
|
|
10
|
+
|----------|------|
|
|
11
|
+
| macOS | `~/Library/Application Support/loreguard/runtime.json` |
|
|
12
|
+
| Linux | `~/.local/share/loreguard/runtime.json` (or `$XDG_DATA_HOME/loreguard/`) |
|
|
13
|
+
| Windows | `%APPDATA%/loreguard/runtime.json` |
|
|
14
|
+
|
|
15
|
+
```json
|
|
16
|
+
{
|
|
17
|
+
"port": 52341,
|
|
18
|
+
"pid": 12345,
|
|
19
|
+
"url": "http://127.0.0.1:52341",
|
|
20
|
+
"started_at": "2026-02-20T10:30:00Z",
|
|
21
|
+
"version": "0.7.0",
|
|
22
|
+
"backend_connected": true
|
|
23
|
+
}
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Read this file to discover the port, then make HTTP calls to `http://127.0.0.1:{port}`.
|
|
27
|
+
|
|
28
|
+
---
|
|
29
|
+
|
|
30
|
+
## Endpoints
|
|
31
|
+
|
|
32
|
+
### `GET /health`
|
|
33
|
+
|
|
34
|
+
Check if loreguard-client is running and connected to the backend.
|
|
35
|
+
|
|
36
|
+
**Response:**
|
|
37
|
+
|
|
38
|
+
```json
|
|
39
|
+
{
|
|
40
|
+
"status": "ok",
|
|
41
|
+
"backend_connected": true
|
|
42
|
+
}
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Returns `500` if the server is in an error state.
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
### `GET /api/capabilities`
|
|
50
|
+
|
|
51
|
+
Discover what features this bundle supports. Use this to feature-detect before sending requests with optional fields.
|
|
52
|
+
|
|
53
|
+
**Response:**
|
|
54
|
+
|
|
55
|
+
```json
|
|
56
|
+
{
|
|
57
|
+
"streaming": true,
|
|
58
|
+
"chunk_modes": ["deberta", "sentence"],
|
|
59
|
+
"manages_history": false
|
|
60
|
+
}
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
| Field | Type | Description |
|
|
64
|
+
|-------|------|-------------|
|
|
65
|
+
| `streaming` | bool | Whether SSE streaming is supported |
|
|
66
|
+
| `chunk_modes` | string[] | Available chunk detection modes (e.g. `"deberta"`, `"sentence"`) |
|
|
67
|
+
| `manages_history` | bool | Whether the bundle can manage conversation history internally |
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
### `POST /api/chat`
|
|
72
|
+
|
|
73
|
+
Send a player message and get an NPC response. Supports both blocking JSON and SSE streaming.
|
|
74
|
+
|
|
75
|
+
**Request Headers:**
|
|
76
|
+
|
|
77
|
+
| Header | Value | Effect |
|
|
78
|
+
|--------|-------|--------|
|
|
79
|
+
| `Content-Type` | `application/json` | Required |
|
|
80
|
+
| `Accept` | `text/event-stream` | Enables SSE streaming (optional) |
|
|
81
|
+
| `Authorization` | `Bearer <token>` | API token for character access (optional) |
|
|
82
|
+
|
|
83
|
+
**Request Body:**
|
|
84
|
+
|
|
85
|
+
```json
|
|
86
|
+
{
|
|
87
|
+
"character_id": "merchant-npc",
|
|
88
|
+
"message": "What do you have for sale?",
|
|
89
|
+
"player_handle": "player1",
|
|
90
|
+
"player_id": "uuid-here",
|
|
91
|
+
"current_context": "player is in the marketplace",
|
|
92
|
+
"scenario_id": "main-quest",
|
|
93
|
+
"history": [
|
|
94
|
+
{"role": "user", "content": "Hello!"},
|
|
95
|
+
{"role": "assistant", "content": "Welcome, traveler!"}
|
|
96
|
+
],
|
|
97
|
+
"chunk_mode": "deberta",
|
|
98
|
+
"manage_history": false,
|
|
99
|
+
"max_speech_tokens": 150,
|
|
100
|
+
"verbose": false,
|
|
101
|
+
"enable_thinking": false
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
| Field | Type | Required | Description |
|
|
106
|
+
|-------|------|----------|-------------|
|
|
107
|
+
| `character_id` | string | Yes | NPC identifier |
|
|
108
|
+
| `message` | string | Yes | Player's message |
|
|
109
|
+
| `player_handle` | string | No | Player's display name |
|
|
110
|
+
| `player_id` | string | No | Player's unique ID for per-player state. If empty, backend uses the developer's owner ID |
|
|
111
|
+
| `current_context` | string | No | Game context (location, situation) |
|
|
112
|
+
| `scenario_id` | string | No | Scenario identifier |
|
|
113
|
+
| `history` | array | No | Conversation history. Omit if `manage_history` is true |
|
|
114
|
+
| `chunk_mode` | string | No | `"deberta"` for ML-based chunk splitting, `"sentence"` for regex sentence splitting, `""` or omit for none |
|
|
115
|
+
| `manage_history` | bool | No | If true, the backend manages conversation history per character+player pair |
|
|
116
|
+
| `max_speech_tokens` | int | No | Maximum tokens in NPC speech (0 = default) |
|
|
117
|
+
| `verbose` | bool | No | Include pipeline pass updates in response (for debugging) |
|
|
118
|
+
| `enable_thinking` | bool | No | Include NPC internal monologue |
|
|
119
|
+
|
|
120
|
+
All field names accept both `snake_case` and `camelCase` (e.g. `character_id` or `characterId`).
|
|
121
|
+
|
|
122
|
+
#### JSON Response (default)
|
|
123
|
+
|
|
124
|
+
When `Accept` is not `text/event-stream`:
|
|
125
|
+
|
|
126
|
+
```json
|
|
127
|
+
{
|
|
128
|
+
"response": "I have potions, swords, and shields. What interests you?",
|
|
129
|
+
"verified": true,
|
|
130
|
+
"citations": ["knowledge/inventory.md:5"],
|
|
131
|
+
"chunks": [
|
|
132
|
+
"I have potions, swords, and shields.",
|
|
133
|
+
"What interests you?"
|
|
134
|
+
],
|
|
135
|
+
"pipeline_trace": []
|
|
136
|
+
}
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
| Field | Type | Description |
|
|
140
|
+
|-------|------|-------------|
|
|
141
|
+
| `response` | string | Full NPC speech |
|
|
142
|
+
| `verified` | bool | Whether NeMo verification passed |
|
|
143
|
+
| `citations` | string[] | Knowledge sources used |
|
|
144
|
+
| `chunks` | string[] | Sentence/chunk boundaries (only present when `chunk_mode` was set) |
|
|
145
|
+
| `pipeline_trace` | array | Pipeline pass details (only present when `verbose` was true) |
|
|
146
|
+
|
|
147
|
+
#### SSE Streaming Response
|
|
148
|
+
|
|
149
|
+
When `Accept: text/event-stream` is set, the response is a stream of Server-Sent Events:
|
|
150
|
+
|
|
151
|
+
```
|
|
152
|
+
event: filler
|
|
153
|
+
data: {"text": "Hmm...", "dialogueAct": "wh-question"}
|
|
154
|
+
|
|
155
|
+
event: pass_update
|
|
156
|
+
data: {"pass": "retrieval", "status": "complete", "latencyMs": 120}
|
|
157
|
+
|
|
158
|
+
event: token
|
|
159
|
+
data: {"t": "I"}
|
|
160
|
+
|
|
161
|
+
event: token
|
|
162
|
+
data: {"t": " have"}
|
|
163
|
+
|
|
164
|
+
event: token
|
|
165
|
+
data: {"t": " potions"}
|
|
166
|
+
|
|
167
|
+
event: done
|
|
168
|
+
data: {"speech": "I have potions...", "verified": true, "citations": [...], "chunks": [...]}
|
|
169
|
+
|
|
170
|
+
event: follow_up
|
|
171
|
+
data: {"speech": "By the way, new stock arrives tomorrow.", ...}
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
| Event | Data | Description |
|
|
175
|
+
|-------|------|-------------|
|
|
176
|
+
| `filler` | `{text, dialogueAct}` | Contextual filler message. Sent early (~100ms) before the pipeline completes. Display immediately for perceived responsiveness |
|
|
177
|
+
| `token` | `{t}` | Single token from the LLM. Append to build the response incrementally |
|
|
178
|
+
| `pass_update` | `{pass, status, ...}` | Pipeline pass progress (only when `verbose` is true). For debugging |
|
|
179
|
+
| `done` | `{speech, verified, citations, chunks, ...}` | Final verified response. Contains the same fields as the JSON response |
|
|
180
|
+
| `follow_up` | `{speech, ...}` | Unsolicited follow-up message from the NPC (may arrive after `done`) |
|
|
181
|
+
| `error` | `{error}` | Error message. Stream ends after this |
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## Integration Patterns
|
|
186
|
+
|
|
187
|
+
### Minimal (any language)
|
|
188
|
+
|
|
189
|
+
Just POST JSON and read the response:
|
|
190
|
+
|
|
191
|
+
```
|
|
192
|
+
POST http://127.0.0.1:{port}/api/chat
|
|
193
|
+
Content-Type: application/json
|
|
194
|
+
|
|
195
|
+
{"character_id": "merchant", "message": "Hello!"}
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
### With Chunks (for staggered display)
|
|
199
|
+
|
|
200
|
+
Request chunked responses and display each chunk separately with delays:
|
|
201
|
+
|
|
202
|
+
```json
|
|
203
|
+
{
|
|
204
|
+
"character_id": "merchant",
|
|
205
|
+
"message": "Hello!",
|
|
206
|
+
"chunk_mode": "deberta"
|
|
207
|
+
}
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Response includes `chunks` array. Display each chunk as a separate message/bubble with ~500-700ms delays between them.
|
|
211
|
+
|
|
212
|
+
### With Server-Managed History
|
|
213
|
+
|
|
214
|
+
Let the backend track conversation history so your game doesn't have to:
|
|
215
|
+
|
|
216
|
+
```json
|
|
217
|
+
{
|
|
218
|
+
"character_id": "merchant",
|
|
219
|
+
"message": "Hello!",
|
|
220
|
+
"player_id": "player-uuid",
|
|
221
|
+
"manage_history": true
|
|
222
|
+
}
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
No `history` field needed. The backend maintains a rolling conversation buffer per `(character_id, player_id)` pair.
|
|
226
|
+
|
|
227
|
+
### With Streaming + Filler
|
|
228
|
+
|
|
229
|
+
For games with real-time UX (speech bubbles, typing indicators):
|
|
230
|
+
|
|
231
|
+
1. Set `Accept: text/event-stream`
|
|
232
|
+
2. On `filler` event: show typing indicator or filler text ("Hmm...")
|
|
233
|
+
3. On `token` events: append tokens to display
|
|
234
|
+
4. On `done` event: finalize the response, show verified status
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## SDK Files
|
|
239
|
+
|
|
240
|
+
Pre-built SDK files for common engines are in the `sdk/` directory. Copy the relevant file into your project:
|
|
241
|
+
|
|
242
|
+
| Engine | File | Notes |
|
|
243
|
+
|--------|------|-------|
|
|
244
|
+
| Python | `sdk/python/loreguard_sdk.py` | Requires `httpx`. Async + sync support |
|
|
245
|
+
| JavaScript / Electron | `sdk/javascript/loreguard-sdk.js` | Node.js CommonJS. Uses `fetch` |
|
|
246
|
+
| Unity / C# | `sdk/csharp/LoreguardSDK.cs` | Uses `UnityWebRequest`. Coroutine-based |
|
|
247
|
+
| Godot 4 | `sdk/gdscript/LoreguardSDK.gd` | Signal-based. Supports streaming |
|
|
248
|
+
|
|
249
|
+
These are thin HTTP wrappers around the endpoints documented above. You can also call the API directly from any language that supports HTTP.
|
|
@@ -145,7 +145,12 @@ async def chat(
|
|
|
145
145
|
character_id: str,
|
|
146
146
|
message: str,
|
|
147
147
|
player_handle: str = "",
|
|
148
|
+
player_id: str = "",
|
|
148
149
|
current_context: str = "",
|
|
150
|
+
history: list[dict[str, Any]] | None = None,
|
|
151
|
+
chunk_mode: str = "",
|
|
152
|
+
manage_history: bool = False,
|
|
153
|
+
max_speech_tokens: int = 0,
|
|
149
154
|
stream: bool = True,
|
|
150
155
|
) -> AsyncIterator[dict[str, Any]]:
|
|
151
156
|
"""Chat with an NPC via loreguard-client.
|
|
@@ -154,12 +159,17 @@ async def chat(
|
|
|
154
159
|
character_id: The NPC's ID
|
|
155
160
|
message: Player's message to the NPC
|
|
156
161
|
player_handle: Player's display name (optional)
|
|
162
|
+
player_id: Player's unique ID for per-player state (optional)
|
|
157
163
|
current_context: Game context like "in a dark cave" (optional)
|
|
164
|
+
history: Conversation history as [{"role": "user"|"assistant", "content": "..."}] (optional)
|
|
165
|
+
chunk_mode: Chunk detection mode — "deberta" for ML-based, "" for none (optional)
|
|
166
|
+
manage_history: If True, bundle manages history internally per character+player (optional)
|
|
167
|
+
max_speech_tokens: Max tokens in NPC speech (optional, 0 = default)
|
|
158
168
|
stream: If True, yields tokens as they arrive. If False, yields final response.
|
|
159
169
|
|
|
160
170
|
Yields:
|
|
161
171
|
For streaming: {"t": "token"} for each token, then {"speech": "...", "verified": True, ...}
|
|
162
|
-
For non-streaming: Single dict with complete response
|
|
172
|
+
For non-streaming: Single dict with complete response (includes "chunks" if chunk_mode set)
|
|
163
173
|
|
|
164
174
|
Raises:
|
|
165
175
|
RuntimeError: If loreguard-client is not running
|
|
@@ -181,12 +191,22 @@ async def chat(
|
|
|
181
191
|
if stream:
|
|
182
192
|
headers["Accept"] = "text/event-stream"
|
|
183
193
|
|
|
184
|
-
body = {
|
|
194
|
+
body: dict[str, Any] = {
|
|
185
195
|
"character_id": character_id,
|
|
186
196
|
"message": message,
|
|
187
197
|
"player_handle": player_handle,
|
|
188
198
|
"current_context": current_context,
|
|
189
199
|
}
|
|
200
|
+
if player_id:
|
|
201
|
+
body["player_id"] = player_id
|
|
202
|
+
if history:
|
|
203
|
+
body["history"] = history
|
|
204
|
+
if chunk_mode:
|
|
205
|
+
body["chunk_mode"] = chunk_mode
|
|
206
|
+
if manage_history:
|
|
207
|
+
body["manage_history"] = True
|
|
208
|
+
if max_speech_tokens > 0:
|
|
209
|
+
body["max_speech_tokens"] = max_speech_tokens
|
|
190
210
|
|
|
191
211
|
async with httpx.AsyncClient() as client:
|
|
192
212
|
if stream:
|
|
@@ -214,6 +234,26 @@ async def chat(
|
|
|
214
234
|
yield response.json()
|
|
215
235
|
|
|
216
236
|
|
|
237
|
+
async def get_capabilities() -> dict[str, Any]:
|
|
238
|
+
"""Get bundle capabilities.
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Capabilities dict with streaming, chunk_modes, manages_history.
|
|
242
|
+
|
|
243
|
+
Raises:
|
|
244
|
+
RuntimeError: If loreguard-client is not running
|
|
245
|
+
ImportError: If httpx is not installed
|
|
246
|
+
"""
|
|
247
|
+
if httpx is None:
|
|
248
|
+
raise ImportError("httpx is required. Install with: pip install httpx")
|
|
249
|
+
|
|
250
|
+
url = f"{get_base_url()}/api/capabilities"
|
|
251
|
+
async with httpx.AsyncClient() as client:
|
|
252
|
+
response = await client.get(url, timeout=5.0)
|
|
253
|
+
response.raise_for_status()
|
|
254
|
+
return response.json()
|
|
255
|
+
|
|
256
|
+
|
|
217
257
|
async def health_check() -> dict[str, Any]:
|
|
218
258
|
"""Check loreguard-client health.
|
|
219
259
|
|
|
@@ -239,7 +279,12 @@ def chat_sync(
|
|
|
239
279
|
character_id: str,
|
|
240
280
|
message: str,
|
|
241
281
|
player_handle: str = "",
|
|
282
|
+
player_id: str = "",
|
|
242
283
|
current_context: str = "",
|
|
284
|
+
history: list[dict[str, Any]] | None = None,
|
|
285
|
+
chunk_mode: str = "",
|
|
286
|
+
manage_history: bool = False,
|
|
287
|
+
max_speech_tokens: int = 0,
|
|
243
288
|
) -> dict[str, Any]:
|
|
244
289
|
"""Synchronous chat (non-streaming).
|
|
245
290
|
|
|
@@ -247,17 +292,28 @@ def chat_sync(
|
|
|
247
292
|
|
|
248
293
|
Returns:
|
|
249
294
|
Complete response dict with speech, verified, citations, etc.
|
|
295
|
+
Includes "chunks" list if chunk_mode was set.
|
|
250
296
|
"""
|
|
251
297
|
if httpx is None:
|
|
252
298
|
raise ImportError("httpx is required. Install with: pip install httpx")
|
|
253
299
|
|
|
254
300
|
url = f"{get_base_url()}/api/chat"
|
|
255
|
-
body = {
|
|
301
|
+
body: dict[str, Any] = {
|
|
256
302
|
"character_id": character_id,
|
|
257
303
|
"message": message,
|
|
258
304
|
"player_handle": player_handle,
|
|
259
305
|
"current_context": current_context,
|
|
260
306
|
}
|
|
307
|
+
if player_id:
|
|
308
|
+
body["player_id"] = player_id
|
|
309
|
+
if history:
|
|
310
|
+
body["history"] = history
|
|
311
|
+
if chunk_mode:
|
|
312
|
+
body["chunk_mode"] = chunk_mode
|
|
313
|
+
if manage_history:
|
|
314
|
+
body["manage_history"] = True
|
|
315
|
+
if max_speech_tokens > 0:
|
|
316
|
+
body["max_speech_tokens"] = max_speech_tokens
|
|
261
317
|
|
|
262
318
|
with httpx.Client() as client:
|
|
263
319
|
response = client.post(url, json=body, timeout=120.0)
|
|
@@ -240,12 +240,33 @@ class LoreguardCLI:
|
|
|
240
240
|
try:
|
|
241
241
|
llm_proxy = LLMProxy(f"http://127.0.0.1:{self.port}")
|
|
242
242
|
|
|
243
|
+
# ADR-0027: Load all ML services — the client is the sole provider
|
|
244
|
+
# of NLI, intent, dialogue act, and chunk capabilities.
|
|
245
|
+
# Use resolve_model_path() to prefer pre-shipped models (enterprise bundles).
|
|
246
|
+
from .config import resolve_model_path
|
|
247
|
+
|
|
248
|
+
# Initialize NLI service (HHEM grounding model)
|
|
249
|
+
nli_service = None
|
|
250
|
+
try:
|
|
251
|
+
from .nli import NLIService
|
|
252
|
+
nli_model = resolve_model_path("vectara/hallucination_evaluation_model", "hhem")
|
|
253
|
+
log.info(f"Loading NLI model ({nli_model})...")
|
|
254
|
+
nli_service = NLIService(model_path=nli_model)
|
|
255
|
+
if nli_service.load_model():
|
|
256
|
+
log.info(f"NLI ready (device: {nli_service.device})")
|
|
257
|
+
else:
|
|
258
|
+
log.warning("NLI model failed to load")
|
|
259
|
+
nli_service = None
|
|
260
|
+
except Exception as e:
|
|
261
|
+
log.warning(f"NLI error: {e}")
|
|
262
|
+
|
|
243
263
|
# Initialize intent classifier (ADR-0010)
|
|
244
264
|
intent_classifier = None
|
|
245
265
|
try:
|
|
246
266
|
from .intent_classifier import IntentClassifier
|
|
247
|
-
|
|
248
|
-
|
|
267
|
+
intent_model = resolve_model_path("MoritzLaurer/DeBERTa-v3-large-zeroshot-v2.0", "deberta")
|
|
268
|
+
log.info(f"Loading intent classifier ({intent_model})...")
|
|
269
|
+
intent_classifier = IntentClassifier(model_path=intent_model)
|
|
249
270
|
if intent_classifier.load_model():
|
|
250
271
|
log.info(f"Intent classifier ready (device: {intent_classifier.device})")
|
|
251
272
|
else:
|
|
@@ -254,6 +275,26 @@ class LoreguardCLI:
|
|
|
254
275
|
except Exception as e:
|
|
255
276
|
log.warning(f"Intent classifier error: {e}")
|
|
256
277
|
|
|
278
|
+
# Initialize dialogue act classifier
|
|
279
|
+
dialogue_act_classifier = None
|
|
280
|
+
try:
|
|
281
|
+
from .dialogue_act_classifier import (
|
|
282
|
+
DialogueActClassifier,
|
|
283
|
+
is_dialogue_act_model_available,
|
|
284
|
+
)
|
|
285
|
+
if is_dialogue_act_model_available():
|
|
286
|
+
log.info("Loading dialogue act classifier...")
|
|
287
|
+
dialogue_act_classifier = DialogueActClassifier()
|
|
288
|
+
if dialogue_act_classifier.load_model():
|
|
289
|
+
log.info(f"Dialogue act classifier ready (device: {dialogue_act_classifier.device})")
|
|
290
|
+
else:
|
|
291
|
+
log.warning("Dialogue act classifier failed to load")
|
|
292
|
+
dialogue_act_classifier = None
|
|
293
|
+
else:
|
|
294
|
+
log.info("Dialogue act model not available, skipping")
|
|
295
|
+
except Exception as e:
|
|
296
|
+
log.warning(f"Dialogue act classifier error: {e}")
|
|
297
|
+
|
|
257
298
|
# Initialize chunk detector (ADR-0023) - shares model with intent classifier
|
|
258
299
|
chunk_detector = None
|
|
259
300
|
try:
|
|
@@ -278,7 +319,9 @@ class LoreguardCLI:
|
|
|
278
319
|
worker_id=self.worker_id,
|
|
279
320
|
worker_token=self.token,
|
|
280
321
|
model_id=self.model_path.stem if self.model_path else "unknown",
|
|
322
|
+
nli_service=nli_service,
|
|
281
323
|
intent_classifier=intent_classifier,
|
|
324
|
+
dialogue_act_classifier=dialogue_act_classifier,
|
|
282
325
|
chunk_detector=chunk_detector,
|
|
283
326
|
)
|
|
284
327
|
|
|
@@ -152,9 +152,49 @@ def load_config() -> dict:
|
|
|
152
152
|
|
|
153
153
|
# Context compaction: if True, truncate old messages instead of erroring
|
|
154
154
|
"CONTEXT_COMPACTION": os.getenv("CONTEXT_COMPACTION", "true").lower() == "true",
|
|
155
|
+
|
|
156
|
+
# ADR-0027: Pre-shipped models directory for enterprise bundles.
|
|
157
|
+
# When set, model loaders check this directory first before downloading from HF.
|
|
158
|
+
# Expected subdirectories: hhem/, deberta/, distilbert/, llm/
|
|
159
|
+
"MODELS_DIR": os.getenv("LOREGUARD_MODELS_DIR", ""),
|
|
160
|
+
|
|
161
|
+
# Pre-shipped llama-server binary path (enterprise bundles).
|
|
162
|
+
# When set, skips auto-download and uses this binary directly.
|
|
163
|
+
"LLAMA_SERVER_PATH": os.getenv("LOREGUARD_LLAMA_SERVER_PATH", ""),
|
|
155
164
|
}
|
|
156
165
|
|
|
157
166
|
|
|
167
|
+
def get_models_dir() -> Optional[Path]:
|
|
168
|
+
"""Get the pre-shipped models directory, if configured (ADR-0027).
|
|
169
|
+
|
|
170
|
+
Returns None if not set, meaning models should be auto-downloaded from HF.
|
|
171
|
+
"""
|
|
172
|
+
models_dir = get_config_value("MODELS_DIR")
|
|
173
|
+
if models_dir:
|
|
174
|
+
path = Path(models_dir)
|
|
175
|
+
if path.exists() and path.is_dir():
|
|
176
|
+
return path
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def resolve_model_path(model_name: str, subdir: str = "") -> str:
|
|
181
|
+
"""Resolve a model path, preferring pre-shipped models over HF downloads.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
model_name: HuggingFace model name (e.g., 'vectara/hallucination_evaluation_model')
|
|
185
|
+
subdir: Subdirectory within MODELS_DIR to check (e.g., 'hhem', 'deberta')
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Local path if pre-shipped model found, otherwise the original HF model name.
|
|
189
|
+
"""
|
|
190
|
+
models_dir = get_models_dir()
|
|
191
|
+
if models_dir and subdir:
|
|
192
|
+
local_path = models_dir / subdir
|
|
193
|
+
if local_path.exists() and any(local_path.iterdir()):
|
|
194
|
+
return str(local_path)
|
|
195
|
+
return model_name
|
|
196
|
+
|
|
197
|
+
|
|
158
198
|
def get_config_value(key: str, default: Optional[str] = None) -> Optional[str]:
|
|
159
199
|
"""Get a single configuration value."""
|
|
160
200
|
config = load_config()
|
|
@@ -1,12 +1,18 @@
|
|
|
1
|
-
"""Embedded HTTP server for game SDK connections.
|
|
1
|
+
"""Embedded HTTP server for game SDK connections (ADR-0027).
|
|
2
2
|
|
|
3
|
-
This module provides
|
|
4
|
-
|
|
3
|
+
This module provides the official SDK HTTP interface for games to interact
|
|
4
|
+
with loreguard. Games discover this server via runtime.json and call its
|
|
5
|
+
HTTP endpoints:
|
|
6
|
+
|
|
7
|
+
GET /health - Health check with backend connection status
|
|
8
|
+
GET /api/capabilities - Feature discovery (streaming, chunk modes)
|
|
9
|
+
GET /api/characters - List available NPCs (proxied from engine)
|
|
10
|
+
POST /api/chat - Chat with an NPC (streaming SSE or JSON)
|
|
5
11
|
|
|
6
12
|
The server shares the existing tunnel connection instead of creating
|
|
7
|
-
a new one, ensuring a single WebSocket connection per
|
|
13
|
+
a new one, ensuring a single WebSocket connection per worker.
|
|
8
14
|
|
|
9
|
-
Uses
|
|
15
|
+
Uses uvicorn with socket-first binding for race-condition-free port allocation.
|
|
10
16
|
"""
|
|
11
17
|
|
|
12
18
|
import asyncio
|
|
@@ -266,6 +272,8 @@ class EmbeddedHTTPServer:
|
|
|
266
272
|
"verified": data.get("verified", False),
|
|
267
273
|
"citations": data.get("citations", []),
|
|
268
274
|
}
|
|
275
|
+
if data.get("chunks"):
|
|
276
|
+
result["chunks"] = data["chunks"]
|
|
269
277
|
if pipeline_trace:
|
|
270
278
|
result["pipeline_trace"] = pipeline_trace
|
|
271
279
|
return result
|
|
@@ -313,16 +321,78 @@ class EmbeddedHTTPServer:
|
|
|
313
321
|
async def health():
|
|
314
322
|
try:
|
|
315
323
|
backend_connected = server.tunnel.connected if server.tunnel else False
|
|
316
|
-
|
|
324
|
+
result = {
|
|
317
325
|
"status": "ok",
|
|
318
326
|
"backend_connected": backend_connected,
|
|
319
327
|
}
|
|
328
|
+
# Include capabilities for game clients to check readiness
|
|
329
|
+
if server.tunnel and hasattr(server.tunnel, "capabilities"):
|
|
330
|
+
result["capabilities"] = server.tunnel.capabilities
|
|
331
|
+
return result
|
|
320
332
|
except Exception as e:
|
|
321
333
|
return JSONResponse(
|
|
322
334
|
status_code=500,
|
|
323
335
|
content={"status": "error", "error": str(e)},
|
|
324
336
|
)
|
|
325
337
|
|
|
338
|
+
@app.get("/api/capabilities")
|
|
339
|
+
async def capabilities():
|
|
340
|
+
caps = {
|
|
341
|
+
"streaming": True,
|
|
342
|
+
"chunk_modes": ["sentence"],
|
|
343
|
+
"manages_history": False,
|
|
344
|
+
}
|
|
345
|
+
if server.tunnel:
|
|
346
|
+
if getattr(server.tunnel, "chunk_detector", None) and server.tunnel.chunk_detector.is_loaded:
|
|
347
|
+
caps["chunk_modes"].append("deberta")
|
|
348
|
+
return caps
|
|
349
|
+
|
|
350
|
+
@app.get("/api/characters")
|
|
351
|
+
async def characters(request: Request):
|
|
352
|
+
"""Proxy character listing from the engine (ADR-0027).
|
|
353
|
+
|
|
354
|
+
Games discover NPCs through the client SDK, not by calling the engine directly.
|
|
355
|
+
"""
|
|
356
|
+
if not server.tunnel or not server.tunnel.connected:
|
|
357
|
+
return JSONResponse(
|
|
358
|
+
status_code=503,
|
|
359
|
+
content={"error": "Not connected to backend. Start the engine first."},
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
# Derive HTTP base URL from WebSocket URL
|
|
363
|
+
# ws://localhost:8090/workers → http://localhost:8090
|
|
364
|
+
# wss://api.loreguard.com/workers → https://api.loreguard.com
|
|
365
|
+
backend_ws = server.tunnel.backend_url
|
|
366
|
+
if backend_ws.startswith("wss://"):
|
|
367
|
+
base_url = "https://" + backend_ws[6:].split("/")[0]
|
|
368
|
+
elif backend_ws.startswith("ws://"):
|
|
369
|
+
base_url = "http://" + backend_ws[5:].split("/")[0]
|
|
370
|
+
else:
|
|
371
|
+
return JSONResponse(
|
|
372
|
+
status_code=500,
|
|
373
|
+
content={"error": f"Cannot derive HTTP URL from backend: {backend_ws}"},
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
try:
|
|
377
|
+
import httpx
|
|
378
|
+
# Forward Authorization header if present
|
|
379
|
+
headers = {}
|
|
380
|
+
auth_header = request.headers.get("authorization", "")
|
|
381
|
+
if auth_header:
|
|
382
|
+
headers["Authorization"] = auth_header
|
|
383
|
+
|
|
384
|
+
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
385
|
+
resp = await client.get(f"{base_url}/api/characters", headers=headers)
|
|
386
|
+
return JSONResponse(
|
|
387
|
+
status_code=resp.status_code,
|
|
388
|
+
content=resp.json(),
|
|
389
|
+
)
|
|
390
|
+
except Exception as e:
|
|
391
|
+
return JSONResponse(
|
|
392
|
+
status_code=502,
|
|
393
|
+
content={"error": f"Failed to reach engine: {e}"},
|
|
394
|
+
)
|
|
395
|
+
|
|
326
396
|
@app.post("/api/chat")
|
|
327
397
|
async def chat(request: Request):
|
|
328
398
|
if not server.tunnel or not server.tunnel.connected:
|
|
@@ -340,6 +410,8 @@ class EmbeddedHTTPServer:
|
|
|
340
410
|
scenario_id = body.get("scenario_id", body.get("scenarioId", ""))
|
|
341
411
|
enable_thinking = body.get("enable_thinking", body.get("enableThinking", False))
|
|
342
412
|
max_speech_tokens = body.get("max_speech_tokens", body.get("maxSpeechTokens", 0))
|
|
413
|
+
chunk_mode = body.get("chunk_mode", body.get("chunkMode", ""))
|
|
414
|
+
manage_history = body.get("manage_history", body.get("manageHistory", False))
|
|
343
415
|
accept = request.headers.get("accept", "")
|
|
344
416
|
streaming = "text/event-stream" in accept
|
|
345
417
|
|
|
@@ -366,6 +438,8 @@ class EmbeddedHTTPServer:
|
|
|
366
438
|
verbose=body.get("verbose", False),
|
|
367
439
|
api_token=api_token,
|
|
368
440
|
max_speech_tokens=max_speech_tokens,
|
|
441
|
+
chunk_mode=chunk_mode,
|
|
442
|
+
manage_history=manage_history,
|
|
369
443
|
)
|
|
370
444
|
)
|
|
371
445
|
# Wait for the result
|
|
@@ -389,6 +463,8 @@ class EmbeddedHTTPServer:
|
|
|
389
463
|
verbose=body.get("verbose", False),
|
|
390
464
|
api_token=api_token,
|
|
391
465
|
max_speech_tokens=max_speech_tokens,
|
|
466
|
+
chunk_mode=chunk_mode,
|
|
467
|
+
manage_history=manage_history,
|
|
392
468
|
)
|
|
393
469
|
|
|
394
470
|
if streaming:
|
|
@@ -106,7 +106,18 @@ def get_slot_cache_dir() -> Path:
|
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
def get_llama_server_path() -> Path:
|
|
109
|
-
"""Get the path to llama-server binary.
|
|
109
|
+
"""Get the path to llama-server binary.
|
|
110
|
+
|
|
111
|
+
ADR-0027: Checks LOREGUARD_LLAMA_SERVER_PATH first for pre-shipped binaries
|
|
112
|
+
(enterprise bundles), then falls back to the default bin directory.
|
|
113
|
+
"""
|
|
114
|
+
# Check for pre-shipped binary (enterprise bundle)
|
|
115
|
+
override = os.environ.get("LOREGUARD_LLAMA_SERVER_PATH", "")
|
|
116
|
+
if override:
|
|
117
|
+
p = Path(override)
|
|
118
|
+
if p.exists() and p.is_file():
|
|
119
|
+
return p
|
|
120
|
+
|
|
110
121
|
plat = get_platform()
|
|
111
122
|
binary_name = BINARIES[plat]["binary_name"]
|
|
112
123
|
return get_bin_dir() / binary_name
|
|
@@ -129,7 +140,16 @@ def get_installed_version() -> Optional[str]:
|
|
|
129
140
|
|
|
130
141
|
|
|
131
142
|
def is_llama_server_installed() -> bool:
|
|
132
|
-
"""Check if llama-server is installed with the correct version.
|
|
143
|
+
"""Check if llama-server is installed with the correct version.
|
|
144
|
+
|
|
145
|
+
ADR-0027: If LOREGUARD_LLAMA_SERVER_PATH is set and the binary exists,
|
|
146
|
+
always returns True (pre-shipped binary, skip version check).
|
|
147
|
+
"""
|
|
148
|
+
# Pre-shipped binary always counts as installed
|
|
149
|
+
override = os.environ.get("LOREGUARD_LLAMA_SERVER_PATH", "")
|
|
150
|
+
if override and Path(override).exists():
|
|
151
|
+
return True
|
|
152
|
+
|
|
133
153
|
server_path = get_llama_server_path()
|
|
134
154
|
if not (server_path.exists() and server_path.is_file()):
|
|
135
155
|
return False
|
|
@@ -572,25 +572,26 @@ class LLMProxy:
|
|
|
572
572
|
payload["enable_thinking"] = False
|
|
573
573
|
|
|
574
574
|
# Force JSON output if requested
|
|
575
|
-
# Use json_object type with schema field - this is supported in llama.cpp server
|
|
576
|
-
# (server-common.cpp extracts schema from response_format.schema for json_object type)
|
|
577
|
-
# Note: json_schema type has a bug (issue #10732, PR #18963 pending)
|
|
578
575
|
if req.force_json:
|
|
579
|
-
# Merge system prompt into user message for "Content-only" template
|
|
580
|
-
if len(req.messages) >= 2 and req.messages[0].get("role") == "system":
|
|
581
|
-
system_content = req.messages[0]["content"]
|
|
582
|
-
user_content = req.messages[-1]["content"]
|
|
583
|
-
merged = f"INSTRUCTIONS:\n{system_content}\n\nREQUEST:\n{user_content}"
|
|
584
|
-
payload["messages"] = [{"role": "user", "content": merged}]
|
|
585
|
-
logger.debug("JSON MODE: Merged system into user message")
|
|
586
|
-
|
|
587
|
-
# Use json_object with schema for proper constraint enforcement
|
|
588
576
|
if req.json_schema:
|
|
589
|
-
|
|
590
|
-
|
|
577
|
+
# Use top-level json_schema field for grammar-constrained JSON.
|
|
578
|
+
# This bypasses the chat template (which corrupts schemas on Llama 3.1
|
|
579
|
+
# by routing them through function-calling) and goes directly to GBNF
|
|
580
|
+
# grammar conversion.
|
|
581
|
+
payload["json_schema"] = req.json_schema
|
|
582
|
+
logger.info("JSON MODE: top-level json_schema for grammar constraint")
|
|
591
583
|
else:
|
|
584
|
+
# Generic JSON object mode (no schema constraint).
|
|
585
|
+
# response_format changes chat template to "Content-only" which loses
|
|
586
|
+
# system prompt context, so merge system into user message.
|
|
587
|
+
if len(req.messages) >= 2 and req.messages[0].get("role") == "system":
|
|
588
|
+
system_content = req.messages[0]["content"]
|
|
589
|
+
user_content = req.messages[-1]["content"]
|
|
590
|
+
merged = f"INSTRUCTIONS:\n{system_content}\n\nREQUEST:\n{user_content}"
|
|
591
|
+
payload["messages"] = [{"role": "user", "content": merged}]
|
|
592
|
+
logger.debug("JSON MODE: Merged system into user message")
|
|
592
593
|
payload["response_format"] = {"type": "json_object"}
|
|
593
|
-
logger.info(
|
|
594
|
+
logger.info("JSON MODE: response_format=json_object (no schema)")
|
|
594
595
|
|
|
595
596
|
# Use per-request timeout if specified
|
|
596
597
|
timeout = req.timeout or self.default_timeout
|
|
@@ -31,7 +31,7 @@ from rich.console import Console
|
|
|
31
31
|
|
|
32
32
|
from .tunnel import BackendTunnel
|
|
33
33
|
from .llm import LLMProxy
|
|
34
|
-
from .config import get_config_value
|
|
34
|
+
from .config import get_config_value, resolve_model_path
|
|
35
35
|
from .nli import NLIService, is_nli_model_available
|
|
36
36
|
from .intent_classifier import IntentClassifier, is_intent_model_available
|
|
37
37
|
from .dialogue_act_classifier import (
|
|
@@ -91,7 +91,8 @@ async def startup():
|
|
|
91
91
|
enable_nli = os.getenv("LOREGUARD_NLI_ENABLED", "true").lower() == "true"
|
|
92
92
|
if enable_nli:
|
|
93
93
|
console.print("[cyan]Initializing NLI service...[/cyan]")
|
|
94
|
-
|
|
94
|
+
nli_model = resolve_model_path("vectara/hallucination_evaluation_model", "hhem")
|
|
95
|
+
nli_service = NLIService(model_path=nli_model)
|
|
95
96
|
if nli_service.load_model():
|
|
96
97
|
console.print(f"[green]NLI service ready (device: {nli_service.device})[/green]")
|
|
97
98
|
else:
|
|
@@ -105,7 +106,8 @@ async def startup():
|
|
|
105
106
|
enable_intent = os.getenv("LOREGUARD_INTENT_ENABLED", "true").lower() == "true"
|
|
106
107
|
if enable_intent:
|
|
107
108
|
console.print("[cyan]Initializing intent classifier...[/cyan]")
|
|
108
|
-
|
|
109
|
+
intent_model = resolve_model_path("MoritzLaurer/DeBERTa-v3-large-zeroshot-v2.0", "deberta")
|
|
110
|
+
intent_classifier = IntentClassifier(model_path=intent_model)
|
|
109
111
|
if intent_classifier.load_model():
|
|
110
112
|
console.print(f"[green]Intent classifier ready (device: {intent_classifier.device})[/green]")
|
|
111
113
|
else:
|
|
@@ -5,6 +5,7 @@ from rich.text import Text
|
|
|
5
5
|
from rich.style import Style
|
|
6
6
|
|
|
7
7
|
from ..styles import FG_DIM, PINK
|
|
8
|
+
from ...runtime import get_version
|
|
8
9
|
|
|
9
10
|
# Simple stylized logo
|
|
10
11
|
LOGO = r"""
|
|
@@ -43,9 +44,9 @@ class LoreguardBanner(Static):
|
|
|
43
44
|
}
|
|
44
45
|
"""
|
|
45
46
|
|
|
46
|
-
def __init__(self, version: str =
|
|
47
|
+
def __init__(self, version: str = None) -> None:
|
|
47
48
|
super().__init__()
|
|
48
|
-
self._version = version
|
|
49
|
+
self._version = version or get_version()
|
|
49
50
|
|
|
50
51
|
def render(self) -> Text:
|
|
51
52
|
"""Render minimal banner."""
|
|
@@ -468,6 +468,7 @@ class BackendTunnel:
|
|
|
468
468
|
elif msg_type == "pass_update":
|
|
469
469
|
# Pipeline pass update (verbose mode)
|
|
470
470
|
payload = data.get("payload", {})
|
|
471
|
+
self._log(f"[pass_update] received pass={payload.get('pass','?')} name={payload.get('name','?')}", "info")
|
|
471
472
|
if self.on_pass_update:
|
|
472
473
|
self.on_pass_update(payload)
|
|
473
474
|
# Also route to per-request queue for HTTP/SSE clients
|
|
@@ -1388,6 +1389,8 @@ class BackendTunnel:
|
|
|
1388
1389
|
verbose: bool = False,
|
|
1389
1390
|
api_token: str = "",
|
|
1390
1391
|
max_speech_tokens: int = 0,
|
|
1392
|
+
chunk_mode: str = "",
|
|
1393
|
+
manage_history: bool = False,
|
|
1391
1394
|
) -> asyncio.Queue[dict[str, Any]]:
|
|
1392
1395
|
"""Send a chat request to the backend and return a queue for responses.
|
|
1393
1396
|
|
|
@@ -1434,6 +1437,10 @@ class BackendTunnel:
|
|
|
1434
1437
|
# Only include maxSpeechTokens if explicitly set (non-zero)
|
|
1435
1438
|
if max_speech_tokens > 0:
|
|
1436
1439
|
payload["maxSpeechTokens"] = max_speech_tokens
|
|
1440
|
+
if chunk_mode:
|
|
1441
|
+
payload["chunkMode"] = chunk_mode
|
|
1442
|
+
if manage_history:
|
|
1443
|
+
payload["manageHistory"] = True
|
|
1437
1444
|
|
|
1438
1445
|
await self._send({
|
|
1439
1446
|
"id": self._generate_message_id(),
|
|
@@ -1498,6 +1505,7 @@ class BackendTunnel:
|
|
|
1498
1505
|
"type": "done",
|
|
1499
1506
|
"data": {
|
|
1500
1507
|
"speech": payload.get("speech", ""),
|
|
1508
|
+
"chunks": payload.get("chunks"),
|
|
1501
1509
|
"thoughts": payload.get("thoughts", ""),
|
|
1502
1510
|
"citations": payload.get("citations", []),
|
|
1503
1511
|
"verified": payload.get("verified", False),
|
{loreguard_cli-0.12.2 → loreguard_cli-0.14.0rc1}/.claude/skills/llama-cpp-troubleshooting/SKILL.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|