hindsight-api 0.0.14__py3-none-any.whl → 0.0.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,24 @@ from pydantic import BaseModel, Field, field_validator, ConfigDict
16
16
  from ..llm_wrapper import OutputTooLongError, LLMConfig
17
17
 
18
18
 
19
+ def _sanitize_text(text: str) -> str:
20
+ """
21
+ Sanitize text by removing invalid Unicode surrogate characters.
22
+
23
+ Surrogate characters (U+D800 to U+DFFF) are used in UTF-16 encoding
24
+ but cannot be encoded in UTF-8. They can appear in Python strings
25
+ from improperly decoded data (e.g., from JavaScript or broken files).
26
+
27
+ This function removes unpaired surrogates to prevent UnicodeEncodeError
28
+ when the text is sent to the LLM API.
29
+ """
30
+ if not text:
31
+ return text
32
+ # Remove surrogate characters (U+D800 to U+DFFF) using regex
33
+ # These are invalid in UTF-8 and cause encoding errors
34
+ return re.sub(r'[\ud800-\udfff]', '', text)
35
+
36
+
19
37
  class Entity(BaseModel):
20
38
  """An entity extracted from text."""
21
39
  text: str = Field(
@@ -470,6 +488,10 @@ WHAT TO EXTRACT vs SKIP
470
488
  max_retries = 2
471
489
  last_error = None
472
490
 
491
+ # Sanitize input text to prevent Unicode encoding errors (e.g., unpaired surrogates)
492
+ sanitized_chunk = _sanitize_text(chunk)
493
+ sanitized_context = _sanitize_text(context) if context else 'none'
494
+
473
495
  # Build user message with metadata and chunk content in a clear format
474
496
  # Format event_date with day of week for better temporal reasoning
475
497
  event_date_formatted = event_date.strftime('%A, %B %d, %Y') # e.g., "Monday, June 10, 2024"
@@ -477,10 +499,10 @@ WHAT TO EXTRACT vs SKIP
477
499
 
478
500
  Chunk: {chunk_index + 1}/{total_chunks}
479
501
  Event Date: {event_date_formatted} ({event_date.isoformat()})
480
- Context: {context if context else 'none'}
502
+ Context: {sanitized_context}
481
503
 
482
504
  Text:
483
- {chunk}"""
505
+ {sanitized_chunk}"""
484
506
 
485
507
  for attempt in range(max_retries):
486
508
  try:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hindsight-api
3
- Version: 0.0.14
3
+ Version: 0.0.15
4
4
  Summary: Temporal + Semantic + Entity Memory System for AI agents using PostgreSQL
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: alembic>=1.17.1
@@ -24,7 +24,7 @@ hindsight_api/engine/retain/deduplication.py,sha256=9YXgVI_m1Mtz5Cv46ZceCEs0GwpL
24
24
  hindsight_api/engine/retain/embedding_processing.py,sha256=cHTt3rPvDCWBWVPfSeg6bwH8HoXYGmP4bvS21boNONI,1734
25
25
  hindsight_api/engine/retain/embedding_utils.py,sha256=Q24h_iw6pRAW2vDWPvauWY1o3bXLzW3eWvSxDALDiE0,1588
26
26
  hindsight_api/engine/retain/entity_processing.py,sha256=meHOjsFzdvh1tbe6YlTofhcUs2Y6TcAN3S-0EKOvFP0,2705
27
- hindsight_api/engine/retain/fact_extraction.py,sha256=D7nnDn7U0UhsAwbo9qahPSpGxiRP-L5tdHT1JAIKM44,45254
27
+ hindsight_api/engine/retain/fact_extraction.py,sha256=vOIlag9rJ8_8Q-TfOhMY88PeJpUyFIp0i7vdEyzbJLY,46125
28
28
  hindsight_api/engine/retain/fact_storage.py,sha256=gRRQf_FCLsj5lUvdlOaxJsS5JosM6IhO_pik8Ur8VFg,5717
29
29
  hindsight_api/engine/retain/link_creation.py,sha256=XJx7U3HboJLHtGgt_tHGsCa58lGo2ZyywzMNosrY9Xc,3154
30
30
  hindsight_api/engine/retain/link_utils.py,sha256=PAXalIhAPZGcJv8EugcpwNgoWZ2D_ciVU3brHL-m090,26226
@@ -43,6 +43,6 @@ hindsight_api/engine/search/tracer.py,sha256=mcM9qZpj3YFudrBCESwc6YKNAiWIMx1lScX
43
43
  hindsight_api/engine/search/types.py,sha256=qIeHW_gT7f291vteTZXygAM8oAaPp2dq6uEdvOyOwzs,5488
44
44
  hindsight_api/web/__init__.py,sha256=WABqyqiAVFJJWOhKCytkj5Vcb61eAsRib3Ek7IMX6_U,378
45
45
  hindsight_api/web/server.py,sha256=oPNJ_z4DO38MdK7Juyh2LdH0ipZ_BQF48cUM-4B_Uw0,5379
46
- hindsight_api-0.0.14.dist-info/METADATA,sha256=FFYSGyrGsxR_vYmM11h2AK4pqC8MdbOjjV46yJ0mlL8,1496
47
- hindsight_api-0.0.14.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
48
- hindsight_api-0.0.14.dist-info/RECORD,,
46
+ hindsight_api-0.0.15.dist-info/METADATA,sha256=bXvuwqAtqB2mCaDUCkiJ8t5tw085gMLWDLsGhFHF9Bo,1496
47
+ hindsight_api-0.0.15.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
48
+ hindsight_api-0.0.15.dist-info/RECORD,,