npcpy 1.2.34__py3-none-any.whl → 1.2.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- npcpy/data/audio.py +35 -1
- npcpy/data/load.py +149 -7
- npcpy/data/video.py +72 -0
- npcpy/ft/diff.py +332 -71
- npcpy/gen/image_gen.py +120 -23
- npcpy/gen/ocr.py +187 -0
- npcpy/memory/command_history.py +231 -40
- npcpy/npc_compiler.py +14 -5
- npcpy/serve.py +1206 -547
- {npcpy-1.2.34.dist-info → npcpy-1.2.35.dist-info}/METADATA +1 -1
- {npcpy-1.2.34.dist-info → npcpy-1.2.35.dist-info}/RECORD +14 -13
- {npcpy-1.2.34.dist-info → npcpy-1.2.35.dist-info}/WHEEL +0 -0
- {npcpy-1.2.34.dist-info → npcpy-1.2.35.dist-info}/licenses/LICENSE +0 -0
- {npcpy-1.2.34.dist-info → npcpy-1.2.35.dist-info}/top_level.txt +0 -0
npcpy/gen/ocr.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Utilities for running DeepSeek OCR (via Unsloth) to turn images into text.
|
|
3
|
+
|
|
4
|
+
This is intentionally lightweight: the model is only downloaded/loaded when
|
|
5
|
+
`DeepSeekOCR.run` is called. You can point `model_id` at a local path or a
|
|
6
|
+
Hugging Face repo ID; we default to the public `unsloth/DeepSeek-OCR`.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import os
|
|
12
|
+
import tempfile
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from typing import Optional, Union
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
from PIL import Image
|
|
18
|
+
except ImportError:
|
|
19
|
+
Image = None # Delayed import for lightweight environments
|
|
20
|
+
|
|
21
|
+
ImageInput = Union[str, bytes, "Image.Image"]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class DeepSeekOCR:
|
|
26
|
+
"""Lazy loader/wrapper around the Unsloth DeepSeek OCR vision model."""
|
|
27
|
+
|
|
28
|
+
model_id: str = "unsloth/DeepSeek-OCR"
|
|
29
|
+
local_dir: str = os.path.expanduser("~/.npcsh/models/deepseek_ocr")
|
|
30
|
+
load_in_4bit: bool = False
|
|
31
|
+
base_size: int = 1024
|
|
32
|
+
image_size: int = 640
|
|
33
|
+
crop_mode: bool = True
|
|
34
|
+
|
|
35
|
+
def __post_init__(self) -> None:
|
|
36
|
+
self._model = None
|
|
37
|
+
self._tokenizer = None
|
|
38
|
+
|
|
39
|
+
def _ensure_weights(self) -> str:
|
|
40
|
+
"""Download weights if they are not already on-disk."""
|
|
41
|
+
if os.path.isdir(self.local_dir) and os.listdir(self.local_dir):
|
|
42
|
+
return self.local_dir
|
|
43
|
+
|
|
44
|
+
os.makedirs(self.local_dir, exist_ok=True)
|
|
45
|
+
try:
|
|
46
|
+
from huggingface_hub import snapshot_download
|
|
47
|
+
except ImportError as exc:
|
|
48
|
+
raise ImportError(
|
|
49
|
+
"huggingface_hub is required to download DeepSeek OCR weights. "
|
|
50
|
+
"Install with `pip install huggingface_hub` or pre-download manually."
|
|
51
|
+
) from exc
|
|
52
|
+
|
|
53
|
+
snapshot_download(self.model_id, local_dir=self.local_dir)
|
|
54
|
+
return self.local_dir
|
|
55
|
+
|
|
56
|
+
def _load_model(self) -> None:
|
|
57
|
+
"""Load the Unsloth vision model once (lazy)."""
|
|
58
|
+
if self._model is not None and self._tokenizer is not None:
|
|
59
|
+
return
|
|
60
|
+
|
|
61
|
+
weights_dir = self._ensure_weights()
|
|
62
|
+
os.environ.setdefault("UNSLOTH_WARN_UNINITIALIZED", "0")
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
from unsloth import FastVisionModel
|
|
66
|
+
from transformers import AutoModel
|
|
67
|
+
except ImportError as exc:
|
|
68
|
+
raise ImportError(
|
|
69
|
+
"unsloth and transformers are required to run DeepSeek OCR. "
|
|
70
|
+
"Install with `pip install unsloth transformers` (and bitsandbytes if using 4bit)."
|
|
71
|
+
) from exc
|
|
72
|
+
|
|
73
|
+
self._model, self._tokenizer = FastVisionModel.from_pretrained(
|
|
74
|
+
weights_dir,
|
|
75
|
+
load_in_4bit=self.load_in_4bit,
|
|
76
|
+
auto_model=AutoModel,
|
|
77
|
+
trust_remote_code=True,
|
|
78
|
+
unsloth_force_compile=True,
|
|
79
|
+
use_gradient_checkpointing="unsloth",
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
def _prepare_image_file(self, image: ImageInput) -> tuple[str, bool]:
|
|
83
|
+
"""Normalize various image inputs to a file path and say if we should clean it up."""
|
|
84
|
+
if isinstance(image, str):
|
|
85
|
+
if not os.path.exists(image):
|
|
86
|
+
raise FileNotFoundError(f"Image path does not exist: {image}")
|
|
87
|
+
return image, False
|
|
88
|
+
|
|
89
|
+
if Image is None:
|
|
90
|
+
raise ImportError("Pillow is required for OCR image handling. Install with `pip install pillow`.")
|
|
91
|
+
|
|
92
|
+
if isinstance(image, bytes):
|
|
93
|
+
import io
|
|
94
|
+
|
|
95
|
+
pil = Image.open(io.BytesIO(image)).convert("RGB")
|
|
96
|
+
elif isinstance(image, Image.Image):
|
|
97
|
+
pil = image.convert("RGB")
|
|
98
|
+
else:
|
|
99
|
+
raise TypeError(f"Unsupported image input type: {type(image)}")
|
|
100
|
+
|
|
101
|
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
|
|
102
|
+
pil.save(tmp, format="PNG")
|
|
103
|
+
tmp.close()
|
|
104
|
+
return tmp.name, True
|
|
105
|
+
|
|
106
|
+
def run(
|
|
107
|
+
self,
|
|
108
|
+
image: ImageInput,
|
|
109
|
+
prompt: str = "<image>\nFree OCR. ",
|
|
110
|
+
output_path: Optional[str] = None,
|
|
111
|
+
save_results: bool = False,
|
|
112
|
+
test_compress: bool = False,
|
|
113
|
+
**kwargs,
|
|
114
|
+
) -> str:
|
|
115
|
+
"""
|
|
116
|
+
Run OCR on an image and return the recognized text.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
image: Path, bytes, or PIL Image.
|
|
120
|
+
prompt: Prompt passed to the vision model (keeps the default used
|
|
121
|
+
in the reference notebook).
|
|
122
|
+
output_path: Optional directory for saving debug outputs.
|
|
123
|
+
save_results: If True, Unsloth will save visualization artifacts.
|
|
124
|
+
test_compress: Forwarded to `model.infer`.
|
|
125
|
+
kwargs: Additional overrides for infer (base_size, image_size, etc).
|
|
126
|
+
"""
|
|
127
|
+
self._load_model()
|
|
128
|
+
|
|
129
|
+
image_file, should_cleanup = self._prepare_image_file(image)
|
|
130
|
+
infer_kwargs = {
|
|
131
|
+
"prompt": prompt,
|
|
132
|
+
"image_file": image_file,
|
|
133
|
+
"output_path": output_path or "",
|
|
134
|
+
"base_size": kwargs.pop("base_size", self.base_size),
|
|
135
|
+
"image_size": kwargs.pop("image_size", self.image_size),
|
|
136
|
+
"crop_mode": kwargs.pop("crop_mode", self.crop_mode),
|
|
137
|
+
"save_results": save_results,
|
|
138
|
+
"test_compress": test_compress,
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
result = self._model.infer(self._tokenizer, **infer_kwargs)
|
|
143
|
+
finally:
|
|
144
|
+
# Clean up temp files created from bytes/PIL inputs.
|
|
145
|
+
if should_cleanup and os.path.exists(image_file):
|
|
146
|
+
try:
|
|
147
|
+
os.remove(image_file)
|
|
148
|
+
except OSError:
|
|
149
|
+
pass
|
|
150
|
+
|
|
151
|
+
# Unsloth infer returns a dict-like object; stringify for callers.
|
|
152
|
+
if isinstance(result, str):
|
|
153
|
+
return result.strip()
|
|
154
|
+
if isinstance(result, dict) and "text" in result:
|
|
155
|
+
return str(result["text"]).strip()
|
|
156
|
+
return str(result).strip()
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def deepseek_ocr(
|
|
160
|
+
image: ImageInput,
|
|
161
|
+
prompt: str = "<image>\nFree OCR. ",
|
|
162
|
+
model_id: str = "unsloth/DeepSeek-OCR",
|
|
163
|
+
local_dir: Optional[str] = None,
|
|
164
|
+
**kwargs,
|
|
165
|
+
) -> str:
|
|
166
|
+
"""
|
|
167
|
+
Functional wrapper that mirrors the reference notebook defaults.
|
|
168
|
+
|
|
169
|
+
Example:
|
|
170
|
+
text = deepseek_ocr(\"invoice.png\")
|
|
171
|
+
"""
|
|
172
|
+
runner = DeepSeekOCR(
|
|
173
|
+
model_id=model_id,
|
|
174
|
+
local_dir=local_dir or os.path.expanduser("~/.npcsh/models/deepseek_ocr"),
|
|
175
|
+
load_in_4bit=kwargs.pop("load_in_4bit", False),
|
|
176
|
+
base_size=kwargs.pop("base_size", 1024),
|
|
177
|
+
image_size=kwargs.pop("image_size", 640),
|
|
178
|
+
crop_mode=kwargs.pop("crop_mode", True),
|
|
179
|
+
)
|
|
180
|
+
return runner.run(
|
|
181
|
+
image=image,
|
|
182
|
+
prompt=prompt,
|
|
183
|
+
output_path=kwargs.pop("output_path", None),
|
|
184
|
+
save_results=kwargs.pop("save_results", False),
|
|
185
|
+
test_compress=kwargs.pop("test_compress", False),
|
|
186
|
+
**kwargs,
|
|
187
|
+
)
|
npcpy/memory/command_history.py
CHANGED
|
@@ -405,9 +405,13 @@ def save_kg_to_db(engine: Engine, kg_data: Dict[str, Any], team_name: str, npc_n
|
|
|
405
405
|
def generate_message_id() -> str:
|
|
406
406
|
return str(uuid.uuid4())
|
|
407
407
|
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
from sqlalchemy import event, Table, Column, Integer, String, Text
|
|
411
|
+
from sqlalchemy.orm import mapper
|
|
412
|
+
|
|
408
413
|
class CommandHistory:
|
|
409
414
|
def __init__(self, db: Union[str, Engine] = "~/npcsh_history.db"):
|
|
410
|
-
|
|
411
415
|
if isinstance(db, str):
|
|
412
416
|
self.engine = create_engine_from_path(db)
|
|
413
417
|
self.db_path = db
|
|
@@ -415,15 +419,54 @@ class CommandHistory:
|
|
|
415
419
|
self.engine = db
|
|
416
420
|
self.db_path = str(db.url)
|
|
417
421
|
else:
|
|
418
|
-
raise TypeError(f"Unsupported type
|
|
422
|
+
raise TypeError(f"Unsupported type: {type(db)}")
|
|
419
423
|
|
|
420
424
|
self._initialize_schema()
|
|
421
|
-
|
|
425
|
+
self._setup_execution_triggers()
|
|
426
|
+
self.backfill_execution_tables()
|
|
427
|
+
def backfill_execution_tables(self):
|
|
428
|
+
with self.engine.begin() as conn:
|
|
429
|
+
conn.execute(text("""
|
|
430
|
+
INSERT OR IGNORE INTO jinx_executions
|
|
431
|
+
(message_id, jinx_name, input, timestamp, npc, team,
|
|
432
|
+
conversation_id)
|
|
433
|
+
SELECT
|
|
434
|
+
message_id,
|
|
435
|
+
SUBSTR(content, 2,
|
|
436
|
+
CASE
|
|
437
|
+
WHEN INSTR(SUBSTR(content, 2), ' ') > 0
|
|
438
|
+
THEN INSTR(SUBSTR(content, 2), ' ') - 1
|
|
439
|
+
ELSE LENGTH(content) - 1
|
|
440
|
+
END
|
|
441
|
+
),
|
|
442
|
+
content,
|
|
443
|
+
timestamp,
|
|
444
|
+
npc,
|
|
445
|
+
team,
|
|
446
|
+
conversation_id
|
|
447
|
+
FROM conversation_history
|
|
448
|
+
WHERE role = 'user' AND content LIKE '/%'
|
|
449
|
+
"""))
|
|
450
|
+
|
|
451
|
+
conn.execute(text("""
|
|
452
|
+
INSERT OR IGNORE INTO npc_executions
|
|
453
|
+
(message_id, input, timestamp, npc, team, conversation_id,
|
|
454
|
+
model, provider)
|
|
455
|
+
SELECT
|
|
456
|
+
message_id,
|
|
457
|
+
content,
|
|
458
|
+
timestamp,
|
|
459
|
+
npc,
|
|
460
|
+
team,
|
|
461
|
+
conversation_id,
|
|
462
|
+
model,
|
|
463
|
+
provider
|
|
464
|
+
FROM conversation_history
|
|
465
|
+
WHERE role = 'user' AND npc IS NOT NULL
|
|
466
|
+
"""))
|
|
422
467
|
def _initialize_schema(self):
|
|
423
|
-
"""Creates all necessary tables."""
|
|
424
468
|
metadata = MetaData()
|
|
425
469
|
|
|
426
|
-
|
|
427
470
|
Table('command_history', metadata,
|
|
428
471
|
Column('id', Integer, primary_key=True, autoincrement=True),
|
|
429
472
|
Column('timestamp', String(50)),
|
|
@@ -433,7 +476,6 @@ class CommandHistory:
|
|
|
433
476
|
Column('location', Text)
|
|
434
477
|
)
|
|
435
478
|
|
|
436
|
-
|
|
437
479
|
Table('conversation_history', metadata,
|
|
438
480
|
Column('id', Integer, primary_key=True, autoincrement=True),
|
|
439
481
|
Column('message_id', String(50), unique=True, nullable=False),
|
|
@@ -448,33 +490,48 @@ class CommandHistory:
|
|
|
448
490
|
Column('team', String(100))
|
|
449
491
|
)
|
|
450
492
|
|
|
451
|
-
|
|
452
493
|
Table('message_attachments', metadata,
|
|
453
494
|
Column('id', Integer, primary_key=True, autoincrement=True),
|
|
454
|
-
Column('message_id', String(50),
|
|
495
|
+
Column('message_id', String(50),
|
|
496
|
+
ForeignKey('conversation_history.message_id',
|
|
497
|
+
ondelete='CASCADE'),
|
|
498
|
+
nullable=False),
|
|
455
499
|
Column('attachment_name', String(255)),
|
|
456
500
|
Column('attachment_type', String(100)),
|
|
457
501
|
Column('attachment_data', LargeBinary),
|
|
458
502
|
Column('attachment_size', Integer),
|
|
459
503
|
Column('upload_timestamp', String(50)),
|
|
460
|
-
Column('file_path', Text)
|
|
504
|
+
Column('file_path', Text)
|
|
505
|
+
)
|
|
506
|
+
|
|
507
|
+
Table('labels', metadata,
|
|
508
|
+
Column('id', Integer, primary_key=True, autoincrement=True),
|
|
509
|
+
Column('entity_type', String(50), nullable=False),
|
|
510
|
+
Column('entity_id', String(100), nullable=False),
|
|
511
|
+
Column('label', String(100), nullable=False),
|
|
512
|
+
Column('metadata', Text),
|
|
513
|
+
Column('created_at', DateTime, default=func.now())
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
Table('jinx_executions', metadata,
|
|
517
|
+
Column('message_id', String(50), primary_key=True),
|
|
518
|
+
Column('jinx_name', String(100)),
|
|
519
|
+
Column('input', Text),
|
|
520
|
+
Column('timestamp', String(50)),
|
|
521
|
+
Column('npc', String(100)),
|
|
522
|
+
Column('team', String(100)),
|
|
523
|
+
Column('conversation_id', String(100))
|
|
461
524
|
)
|
|
462
525
|
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
Column('
|
|
466
|
-
Column('
|
|
467
|
-
Column('
|
|
468
|
-
Column('
|
|
469
|
-
Column('
|
|
470
|
-
Column('
|
|
471
|
-
Column('
|
|
472
|
-
Column('jinx_name', String(100), nullable=False),
|
|
473
|
-
Column('jinx_inputs', Text),
|
|
474
|
-
Column('jinx_output', Text),
|
|
475
|
-
Column('status', String(50), nullable=False),
|
|
476
|
-
Column('error_message', Text),
|
|
477
|
-
Column('duration_ms', Integer)
|
|
526
|
+
Table('npc_executions', metadata,
|
|
527
|
+
Column('message_id', String(50), primary_key=True),
|
|
528
|
+
Column('input', Text),
|
|
529
|
+
Column('timestamp', String(50)),
|
|
530
|
+
Column('npc', String(100)),
|
|
531
|
+
Column('team', String(100)),
|
|
532
|
+
Column('conversation_id', String(100)),
|
|
533
|
+
Column('model', String(100)),
|
|
534
|
+
Column('provider', String(100))
|
|
478
535
|
)
|
|
479
536
|
|
|
480
537
|
Table('memory_lifecycle', metadata,
|
|
@@ -492,30 +549,137 @@ class CommandHistory:
|
|
|
492
549
|
Column('provider', String(100)),
|
|
493
550
|
Column('created_at', DateTime, default=func.now())
|
|
494
551
|
)
|
|
495
|
-
|
|
496
552
|
|
|
497
553
|
metadata.create_all(self.engine, checkfirst=True)
|
|
554
|
+
init_kg_schema(self.engine)
|
|
555
|
+
|
|
556
|
+
def _setup_execution_triggers(self):
|
|
557
|
+
if 'sqlite' in str(self.engine.url):
|
|
558
|
+
with self.engine.begin() as conn:
|
|
559
|
+
conn.execute(text("""
|
|
560
|
+
CREATE TRIGGER IF NOT EXISTS populate_jinx_executions
|
|
561
|
+
AFTER INSERT ON conversation_history
|
|
562
|
+
WHEN NEW.role = 'user' AND NEW.content LIKE '/%'
|
|
563
|
+
BEGIN
|
|
564
|
+
INSERT OR IGNORE INTO jinx_executions
|
|
565
|
+
(message_id, jinx_name, input, timestamp, npc, team,
|
|
566
|
+
conversation_id)
|
|
567
|
+
VALUES (
|
|
568
|
+
NEW.message_id,
|
|
569
|
+
SUBSTR(NEW.content, 2,
|
|
570
|
+
CASE
|
|
571
|
+
WHEN INSTR(SUBSTR(NEW.content, 2), ' ') > 0
|
|
572
|
+
THEN INSTR(SUBSTR(NEW.content, 2), ' ') - 1
|
|
573
|
+
ELSE LENGTH(NEW.content) - 1
|
|
574
|
+
END
|
|
575
|
+
),
|
|
576
|
+
NEW.content,
|
|
577
|
+
NEW.timestamp,
|
|
578
|
+
NEW.npc,
|
|
579
|
+
NEW.team,
|
|
580
|
+
NEW.conversation_id
|
|
581
|
+
);
|
|
582
|
+
END
|
|
583
|
+
"""))
|
|
584
|
+
|
|
585
|
+
conn.execute(text("""
|
|
586
|
+
CREATE TRIGGER IF NOT EXISTS populate_npc_executions
|
|
587
|
+
AFTER INSERT ON conversation_history
|
|
588
|
+
WHEN NEW.role = 'user' AND NEW.npc IS NOT NULL
|
|
589
|
+
BEGIN
|
|
590
|
+
INSERT OR IGNORE INTO npc_executions
|
|
591
|
+
(message_id, input, timestamp, npc, team,
|
|
592
|
+
conversation_id, model, provider)
|
|
593
|
+
VALUES (
|
|
594
|
+
NEW.message_id,
|
|
595
|
+
NEW.content,
|
|
596
|
+
NEW.timestamp,
|
|
597
|
+
NEW.npc,
|
|
598
|
+
NEW.team,
|
|
599
|
+
NEW.conversation_id,
|
|
600
|
+
NEW.model,
|
|
601
|
+
NEW.provider
|
|
602
|
+
);
|
|
603
|
+
END
|
|
604
|
+
"""))
|
|
605
|
+
|
|
606
|
+
def get_jinx_executions(self, jinx_name: str = None, limit: int = 1000) -> List[Dict]:
|
|
607
|
+
if jinx_name:
|
|
608
|
+
stmt = """
|
|
609
|
+
SELECT je.*, l.label
|
|
610
|
+
FROM jinx_executions je
|
|
611
|
+
LEFT JOIN labels l ON l.entity_type = 'message'
|
|
612
|
+
AND l.entity_id = je.message_id
|
|
613
|
+
WHERE je.jinx_name = :jinx_name
|
|
614
|
+
ORDER BY je.timestamp DESC
|
|
615
|
+
LIMIT :limit
|
|
616
|
+
"""
|
|
617
|
+
return self._fetch_all(stmt, {"jinx_name": jinx_name, "limit": limit})
|
|
498
618
|
|
|
619
|
+
stmt = """
|
|
620
|
+
SELECT je.*, l.label
|
|
621
|
+
FROM jinx_executions je
|
|
622
|
+
LEFT JOIN labels l ON l.entity_type = 'message'
|
|
623
|
+
AND l.entity_id = je.message_id
|
|
624
|
+
ORDER BY je.timestamp DESC
|
|
625
|
+
LIMIT :limit
|
|
626
|
+
"""
|
|
627
|
+
return self._fetch_all(stmt, {"limit": limit})
|
|
628
|
+
|
|
629
|
+
def get_npc_executions(self, npc_name: str, limit: int = 1000) -> List[Dict]:
|
|
630
|
+
stmt = """
|
|
631
|
+
SELECT ne.*, l.label
|
|
632
|
+
FROM npc_executions ne
|
|
633
|
+
LEFT JOIN labels l ON l.entity_type = 'message'
|
|
634
|
+
AND l.entity_id = ne.message_id
|
|
635
|
+
WHERE ne.npc = :npc_name
|
|
636
|
+
ORDER BY ne.timestamp DESC
|
|
637
|
+
LIMIT :limit
|
|
638
|
+
"""
|
|
639
|
+
return self._fetch_all(stmt, {"npc_name": npc_name, "limit": limit})
|
|
640
|
+
|
|
641
|
+
def label_execution(self, message_id: str, label: str):
|
|
642
|
+
self.add_label('message', message_id, label)
|
|
499
643
|
|
|
644
|
+
def add_label(self, entity_type: str, entity_id: str, label: str, metadata: dict = None):
|
|
645
|
+
stmt = """
|
|
646
|
+
INSERT INTO labels (entity_type, entity_id, label, metadata)
|
|
647
|
+
VALUES (:entity_type, :entity_id, :label, :metadata)
|
|
648
|
+
"""
|
|
500
649
|
with self.engine.begin() as conn:
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
"
|
|
504
|
-
"
|
|
505
|
-
"
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
conn.execute(text(idx_query))
|
|
512
|
-
except SQLAlchemyError:
|
|
513
|
-
|
|
514
|
-
pass
|
|
650
|
+
conn.execute(text(stmt), {
|
|
651
|
+
"entity_type": entity_type,
|
|
652
|
+
"entity_id": entity_id,
|
|
653
|
+
"label": label,
|
|
654
|
+
"metadata": json.dumps(metadata) if metadata else None
|
|
655
|
+
})
|
|
656
|
+
|
|
657
|
+
def get_labels(self, entity_type: str = None, label: str = None) -> List[Dict]:
|
|
658
|
+
conditions = []
|
|
659
|
+
params = {}
|
|
515
660
|
|
|
661
|
+
if entity_type:
|
|
662
|
+
conditions.append("entity_type = :entity_type")
|
|
663
|
+
params["entity_type"] = entity_type
|
|
664
|
+
if label:
|
|
665
|
+
conditions.append("label = :label")
|
|
666
|
+
params["label"] = label
|
|
516
667
|
|
|
517
|
-
|
|
668
|
+
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
|
669
|
+
stmt = f"SELECT * FROM labels {where} ORDER BY created_at DESC"
|
|
670
|
+
|
|
671
|
+
return self._fetch_all(stmt, params)
|
|
518
672
|
|
|
673
|
+
def get_training_data_by_label(self, label: str = 'training') -> List[Dict]:
|
|
674
|
+
stmt = """
|
|
675
|
+
SELECT l.entity_type, l.entity_id, l.metadata,
|
|
676
|
+
ch.content, ch.role, ch.npc, ch.conversation_id
|
|
677
|
+
FROM labels l
|
|
678
|
+
LEFT JOIN conversation_history ch ON
|
|
679
|
+
(l.entity_type = 'message' AND l.entity_id = ch.message_id)
|
|
680
|
+
WHERE l.label = :label
|
|
681
|
+
"""
|
|
682
|
+
return self._fetch_all(stmt, {"label": label})
|
|
519
683
|
def _execute_returning_id(self, stmt: str, params: Dict = None) -> Optional[int]:
|
|
520
684
|
"""Execute INSERT and return the generated ID"""
|
|
521
685
|
with self.engine.begin() as conn:
|
|
@@ -535,6 +699,7 @@ class CommandHistory:
|
|
|
535
699
|
result = conn.execute(text(stmt), params or {})
|
|
536
700
|
return [dict(row._mapping) for row in result]
|
|
537
701
|
|
|
702
|
+
|
|
538
703
|
def add_command(self, command, subcommands, output, location):
|
|
539
704
|
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
|
540
705
|
stmt = """
|
|
@@ -1092,6 +1257,32 @@ def start_new_conversation(prepend: str = None) -> str:
|
|
|
1092
1257
|
prepend = 'npcsh'
|
|
1093
1258
|
return f"{prepend}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
|
1094
1259
|
|
|
1260
|
+
|
|
1261
|
+
def format_memory_context(memory_examples):
|
|
1262
|
+
if not memory_examples:
|
|
1263
|
+
return ""
|
|
1264
|
+
|
|
1265
|
+
context_parts = []
|
|
1266
|
+
|
|
1267
|
+
approved_examples = memory_examples.get("approved", [])
|
|
1268
|
+
rejected_examples = memory_examples.get("rejected", [])
|
|
1269
|
+
|
|
1270
|
+
if approved_examples:
|
|
1271
|
+
context_parts.append("EXAMPLES OF GOOD MEMORIES:")
|
|
1272
|
+
for ex in approved_examples[:5]:
|
|
1273
|
+
final = ex.get("final_memory") or ex.get("initial_memory")
|
|
1274
|
+
context_parts.append(f"- {final}")
|
|
1275
|
+
|
|
1276
|
+
if rejected_examples:
|
|
1277
|
+
context_parts.append("\nEXAMPLES OF POOR MEMORIES TO AVOID:")
|
|
1278
|
+
for ex in rejected_examples[:3]:
|
|
1279
|
+
context_parts.append(f"- {ex.get('initial_memory')}")
|
|
1280
|
+
|
|
1281
|
+
if context_parts:
|
|
1282
|
+
context_parts.append("\nLearn from these examples to generate similar high-quality memories.")
|
|
1283
|
+
return "\n".join(context_parts)
|
|
1284
|
+
|
|
1285
|
+
return ""
|
|
1095
1286
|
def save_conversation_message(
|
|
1096
1287
|
command_history: CommandHistory,
|
|
1097
1288
|
conversation_id: str,
|
npcpy/npc_compiler.py
CHANGED
|
@@ -20,7 +20,8 @@ from sqlalchemy import create_engine, text
|
|
|
20
20
|
import npcpy as npy
|
|
21
21
|
from npcpy.llm_funcs import DEFAULT_ACTION_SPACE
|
|
22
22
|
from npcpy.tools import auto_tools
|
|
23
|
-
|
|
23
|
+
import math
|
|
24
|
+
import random
|
|
24
25
|
from npcpy.npc_sysenv import (
|
|
25
26
|
ensure_dirs_exist,
|
|
26
27
|
init_db_tables,
|
|
@@ -259,16 +260,18 @@ class Jinx:
|
|
|
259
260
|
self._load_from_data(jinx_data)
|
|
260
261
|
else:
|
|
261
262
|
raise ValueError("Either jinx_data or jinx_path must be provided")
|
|
262
|
-
|
|
263
|
+
|
|
264
|
+
# Keep a copy for macro expansion, but retain the executable steps by default
|
|
263
265
|
self._raw_steps = list(self.steps)
|
|
264
|
-
self.steps =
|
|
265
|
-
|
|
266
|
+
self.steps = list(self._raw_steps)
|
|
266
267
|
def _load_from_file(self, path):
|
|
267
268
|
jinx_data = load_yaml_file(path)
|
|
268
269
|
if not jinx_data:
|
|
269
270
|
raise ValueError(f"Failed to load jinx from {path}")
|
|
271
|
+
self._source_path = path
|
|
270
272
|
self._load_from_data(jinx_data)
|
|
271
273
|
|
|
274
|
+
|
|
272
275
|
def _load_from_data(self, jinx_data):
|
|
273
276
|
if not jinx_data or not isinstance(jinx_data, dict):
|
|
274
277
|
raise ValueError("Invalid jinx data provided")
|
|
@@ -281,6 +284,7 @@ class Jinx:
|
|
|
281
284
|
self.description = jinx_data.get("description", "")
|
|
282
285
|
self.npc = jinx_data.get("npc")
|
|
283
286
|
self.steps = jinx_data.get("steps", [])
|
|
287
|
+
self._source_path = jinx_data.get("_source_path", None)
|
|
284
288
|
|
|
285
289
|
def render_first_pass(
|
|
286
290
|
self,
|
|
@@ -450,6 +454,10 @@ class Jinx:
|
|
|
450
454
|
"__builtins__": __builtins__,
|
|
451
455
|
"npc": active_npc,
|
|
452
456
|
"context": context,
|
|
457
|
+
"math": math,
|
|
458
|
+
"random": random,
|
|
459
|
+
"datetime": datetime,
|
|
460
|
+
"Image": Image,
|
|
453
461
|
"pd": pd,
|
|
454
462
|
"plt": plt,
|
|
455
463
|
"sys": sys,
|
|
@@ -618,8 +626,9 @@ def get_npc_action_space(npc=None, team=None):
|
|
|
618
626
|
if npc:
|
|
619
627
|
core_tools = [
|
|
620
628
|
npc.think_step_by_step,
|
|
621
|
-
npc.write_code
|
|
622
629
|
]
|
|
630
|
+
if hasattr(npc, "write_code"):
|
|
631
|
+
core_tools.append(npc.write_code)
|
|
623
632
|
|
|
624
633
|
if npc.command_history:
|
|
625
634
|
core_tools.extend([
|