hindsight-api 0.0.14__py3-none-any.whl → 0.0.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/cli.py +128 -0
- hindsight_api/engine/retain/fact_extraction.py +24 -2
- hindsight_api/web/server.py +1 -1
- {hindsight_api-0.0.14.dist-info → hindsight_api-0.0.16.dist-info}/METADATA +1 -1
- {hindsight_api-0.0.14.dist-info → hindsight_api-0.0.16.dist-info}/RECORD +7 -5
- hindsight_api-0.0.16.dist-info/entry_points.txt +2 -0
- {hindsight_api-0.0.14.dist-info → hindsight_api-0.0.16.dist-info}/WHEEL +0 -0
hindsight_api/cli.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command-line interface for Hindsight API.
|
|
3
|
+
|
|
4
|
+
Run the server with:
|
|
5
|
+
hindsight-api
|
|
6
|
+
|
|
7
|
+
Stop with Ctrl+C.
|
|
8
|
+
"""
|
|
9
|
+
import argparse
|
|
10
|
+
import asyncio
|
|
11
|
+
import atexit
|
|
12
|
+
import os
|
|
13
|
+
import signal
|
|
14
|
+
import sys
|
|
15
|
+
from typing import Optional
|
|
16
|
+
|
|
17
|
+
import uvicorn
|
|
18
|
+
|
|
19
|
+
from . import MemoryEngine
|
|
20
|
+
from .api import create_app
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Disable tokenizers parallelism to avoid warnings
|
|
24
|
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
25
|
+
|
|
26
|
+
# Global reference for cleanup
|
|
27
|
+
_memory: Optional[MemoryEngine] = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _cleanup():
|
|
31
|
+
"""Synchronous cleanup function to stop resources on exit."""
|
|
32
|
+
global _memory
|
|
33
|
+
if _memory is not None and _memory._pg0 is not None:
|
|
34
|
+
try:
|
|
35
|
+
loop = asyncio.new_event_loop()
|
|
36
|
+
loop.run_until_complete(_memory._pg0.stop())
|
|
37
|
+
loop.close()
|
|
38
|
+
print("\npg0 stopped.")
|
|
39
|
+
except Exception as e:
|
|
40
|
+
print(f"\nError stopping pg0: {e}")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _signal_handler(signum, frame):
|
|
44
|
+
"""Handle SIGINT/SIGTERM to ensure cleanup."""
|
|
45
|
+
print(f"\nReceived signal {signum}, shutting down...")
|
|
46
|
+
_cleanup()
|
|
47
|
+
sys.exit(0)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def main():
|
|
51
|
+
"""Main entry point for the CLI."""
|
|
52
|
+
global _memory
|
|
53
|
+
|
|
54
|
+
parser = argparse.ArgumentParser(
|
|
55
|
+
prog="hindsight-api",
|
|
56
|
+
description="Hindsight API Server",
|
|
57
|
+
)
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"--host", default="0.0.0.0",
|
|
60
|
+
help="Host to bind to (default: 0.0.0.0)"
|
|
61
|
+
)
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"--port", type=int, default=8888,
|
|
64
|
+
help="Port to bind to (default: 8888)"
|
|
65
|
+
)
|
|
66
|
+
parser.add_argument(
|
|
67
|
+
"--log-level", default="info",
|
|
68
|
+
choices=["critical", "error", "warning", "info", "debug", "trace"],
|
|
69
|
+
help="Log level (default: info)"
|
|
70
|
+
)
|
|
71
|
+
parser.add_argument(
|
|
72
|
+
"--access-log", action="store_true",
|
|
73
|
+
help="Enable access log"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
args = parser.parse_args()
|
|
77
|
+
|
|
78
|
+
# Register cleanup handlers
|
|
79
|
+
atexit.register(_cleanup)
|
|
80
|
+
signal.signal(signal.SIGINT, _signal_handler)
|
|
81
|
+
signal.signal(signal.SIGTERM, _signal_handler)
|
|
82
|
+
|
|
83
|
+
# Get configuration from environment variables
|
|
84
|
+
db_url = os.getenv("HINDSIGHT_API_DATABASE_URL", "pg0")
|
|
85
|
+
llm_provider = os.getenv("HINDSIGHT_API_LLM_PROVIDER", "groq")
|
|
86
|
+
llm_api_key = os.getenv("HINDSIGHT_API_LLM_API_KEY", "")
|
|
87
|
+
llm_model = os.getenv("HINDSIGHT_API_LLM_MODEL", "openai/gpt-oss-20b")
|
|
88
|
+
llm_base_url = os.getenv("HINDSIGHT_API_LLM_BASE_URL") or None
|
|
89
|
+
|
|
90
|
+
# Create MemoryEngine
|
|
91
|
+
_memory = MemoryEngine(
|
|
92
|
+
db_url=db_url,
|
|
93
|
+
memory_llm_provider=llm_provider,
|
|
94
|
+
memory_llm_api_key=llm_api_key,
|
|
95
|
+
memory_llm_model=llm_model,
|
|
96
|
+
memory_llm_base_url=llm_base_url,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Create FastAPI app
|
|
100
|
+
app = create_app(
|
|
101
|
+
memory=_memory,
|
|
102
|
+
http_api_enabled=True,
|
|
103
|
+
mcp_api_enabled=True,
|
|
104
|
+
mcp_mount_path="/mcp",
|
|
105
|
+
run_migrations=True,
|
|
106
|
+
initialize_memory=True,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Prepare uvicorn config
|
|
110
|
+
uvicorn_config = {
|
|
111
|
+
"app": app,
|
|
112
|
+
"host": args.host,
|
|
113
|
+
"port": args.port,
|
|
114
|
+
"log_level": args.log_level,
|
|
115
|
+
"access_log": args.access_log,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
print(f"\nStarting Hindsight API...")
|
|
119
|
+
print(f" URL: http://{args.host}:{args.port}")
|
|
120
|
+
print(f" Database: {db_url}")
|
|
121
|
+
print(f" LLM Provider: {llm_provider}")
|
|
122
|
+
print()
|
|
123
|
+
|
|
124
|
+
uvicorn.run(**uvicorn_config)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
if __name__ == "__main__":
|
|
128
|
+
main()
|
|
@@ -16,6 +16,24 @@ from pydantic import BaseModel, Field, field_validator, ConfigDict
|
|
|
16
16
|
from ..llm_wrapper import OutputTooLongError, LLMConfig
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
def _sanitize_text(text: str) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Sanitize text by removing invalid Unicode surrogate characters.
|
|
22
|
+
|
|
23
|
+
Surrogate characters (U+D800 to U+DFFF) are used in UTF-16 encoding
|
|
24
|
+
but cannot be encoded in UTF-8. They can appear in Python strings
|
|
25
|
+
from improperly decoded data (e.g., from JavaScript or broken files).
|
|
26
|
+
|
|
27
|
+
This function removes unpaired surrogates to prevent UnicodeEncodeError
|
|
28
|
+
when the text is sent to the LLM API.
|
|
29
|
+
"""
|
|
30
|
+
if not text:
|
|
31
|
+
return text
|
|
32
|
+
# Remove surrogate characters (U+D800 to U+DFFF) using regex
|
|
33
|
+
# These are invalid in UTF-8 and cause encoding errors
|
|
34
|
+
return re.sub(r'[\ud800-\udfff]', '', text)
|
|
35
|
+
|
|
36
|
+
|
|
19
37
|
class Entity(BaseModel):
|
|
20
38
|
"""An entity extracted from text."""
|
|
21
39
|
text: str = Field(
|
|
@@ -470,6 +488,10 @@ WHAT TO EXTRACT vs SKIP
|
|
|
470
488
|
max_retries = 2
|
|
471
489
|
last_error = None
|
|
472
490
|
|
|
491
|
+
# Sanitize input text to prevent Unicode encoding errors (e.g., unpaired surrogates)
|
|
492
|
+
sanitized_chunk = _sanitize_text(chunk)
|
|
493
|
+
sanitized_context = _sanitize_text(context) if context else 'none'
|
|
494
|
+
|
|
473
495
|
# Build user message with metadata and chunk content in a clear format
|
|
474
496
|
# Format event_date with day of week for better temporal reasoning
|
|
475
497
|
event_date_formatted = event_date.strftime('%A, %B %d, %Y') # e.g., "Monday, June 10, 2024"
|
|
@@ -477,10 +499,10 @@ WHAT TO EXTRACT vs SKIP
|
|
|
477
499
|
|
|
478
500
|
Chunk: {chunk_index + 1}/{total_chunks}
|
|
479
501
|
Event Date: {event_date_formatted} ({event_date.isoformat()})
|
|
480
|
-
Context: {
|
|
502
|
+
Context: {sanitized_context}
|
|
481
503
|
|
|
482
504
|
Text:
|
|
483
|
-
{
|
|
505
|
+
{sanitized_chunk}"""
|
|
484
506
|
|
|
485
507
|
for attempt in range(max_retries):
|
|
486
508
|
try:
|
hindsight_api/web/server.py
CHANGED
|
@@ -85,7 +85,7 @@ if __name__ == "__main__":
|
|
|
85
85
|
env_log_level = "info"
|
|
86
86
|
|
|
87
87
|
# Parse CLI arguments
|
|
88
|
-
parser = argparse.ArgumentParser(description="
|
|
88
|
+
parser = argparse.ArgumentParser(description="Hindsight API Server")
|
|
89
89
|
parser.add_argument("--host", default="0.0.0.0", help="Host to bind to (default: 0.0.0.0)")
|
|
90
90
|
parser.add_argument("--port", type=int, default=8888, help="Port to bind to (default: 8888)")
|
|
91
91
|
parser.add_argument("--reload", action="store_true", help="Enable auto-reload on code changes")
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
hindsight_api/__init__.py,sha256=yQWYWUWEhvs1OY1coENhZV_CuOAWmN_YKZXQMIvGN94,851
|
|
2
|
+
hindsight_api/cli.py,sha256=-dxAHsET_pHd6NlA3ufI4KEKQA3fL3YapCvDB_x2ax8,3303
|
|
2
3
|
hindsight_api/metrics.py,sha256=j4-eeqVjjcGQxAxS_GgEaBNm10KdUxrGS_I2d1IM1hY,7255
|
|
3
4
|
hindsight_api/migrations.py,sha256=VY-ILJLWEY1IaeJgQ2jlAVUtPLzq_41Dytg_DjuF0GA,6402
|
|
4
5
|
hindsight_api/models.py,sha256=1vMn9jmDQvohfmxZXr1SYnhz5vhz52nrTd93A_lkVNE,12606
|
|
@@ -24,7 +25,7 @@ hindsight_api/engine/retain/deduplication.py,sha256=9YXgVI_m1Mtz5Cv46ZceCEs0GwpL
|
|
|
24
25
|
hindsight_api/engine/retain/embedding_processing.py,sha256=cHTt3rPvDCWBWVPfSeg6bwH8HoXYGmP4bvS21boNONI,1734
|
|
25
26
|
hindsight_api/engine/retain/embedding_utils.py,sha256=Q24h_iw6pRAW2vDWPvauWY1o3bXLzW3eWvSxDALDiE0,1588
|
|
26
27
|
hindsight_api/engine/retain/entity_processing.py,sha256=meHOjsFzdvh1tbe6YlTofhcUs2Y6TcAN3S-0EKOvFP0,2705
|
|
27
|
-
hindsight_api/engine/retain/fact_extraction.py,sha256=
|
|
28
|
+
hindsight_api/engine/retain/fact_extraction.py,sha256=vOIlag9rJ8_8Q-TfOhMY88PeJpUyFIp0i7vdEyzbJLY,46125
|
|
28
29
|
hindsight_api/engine/retain/fact_storage.py,sha256=gRRQf_FCLsj5lUvdlOaxJsS5JosM6IhO_pik8Ur8VFg,5717
|
|
29
30
|
hindsight_api/engine/retain/link_creation.py,sha256=XJx7U3HboJLHtGgt_tHGsCa58lGo2ZyywzMNosrY9Xc,3154
|
|
30
31
|
hindsight_api/engine/retain/link_utils.py,sha256=PAXalIhAPZGcJv8EugcpwNgoWZ2D_ciVU3brHL-m090,26226
|
|
@@ -42,7 +43,8 @@ hindsight_api/engine/search/trace.py,sha256=GT86_LVKMyG2mw6EJzPjafvbqaot6XVy5fZ0
|
|
|
42
43
|
hindsight_api/engine/search/tracer.py,sha256=mcM9qZpj3YFudrBCESwc6YKNAiWIMx1lScXWn5ru-ok,15017
|
|
43
44
|
hindsight_api/engine/search/types.py,sha256=qIeHW_gT7f291vteTZXygAM8oAaPp2dq6uEdvOyOwzs,5488
|
|
44
45
|
hindsight_api/web/__init__.py,sha256=WABqyqiAVFJJWOhKCytkj5Vcb61eAsRib3Ek7IMX6_U,378
|
|
45
|
-
hindsight_api/web/server.py,sha256=
|
|
46
|
-
hindsight_api-0.0.
|
|
47
|
-
hindsight_api-0.0.
|
|
48
|
-
hindsight_api-0.0.
|
|
46
|
+
hindsight_api/web/server.py,sha256=txP1OBiwxZTyDbPLUYZ9XPMysskxEceHlxbDEvIq0ok,5376
|
|
47
|
+
hindsight_api-0.0.16.dist-info/METADATA,sha256=0Tg86KkYkFYEBFBGjUhSE-RwllDeQ5n30vctTeK2DFk,1496
|
|
48
|
+
hindsight_api-0.0.16.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
49
|
+
hindsight_api-0.0.16.dist-info/entry_points.txt,sha256=53Fn-VxtkqreZhOPTJB_FupH7e5GyiMY3gzEp22d8xs,57
|
|
50
|
+
hindsight_api-0.0.16.dist-info/RECORD,,
|
|
File without changes
|