npcpy 1.0.26__py3-none-any.whl → 1.2.32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. npcpy/__init__.py +0 -7
  2. npcpy/data/audio.py +16 -99
  3. npcpy/data/image.py +43 -42
  4. npcpy/data/load.py +83 -124
  5. npcpy/data/text.py +28 -28
  6. npcpy/data/video.py +8 -32
  7. npcpy/data/web.py +51 -23
  8. npcpy/ft/diff.py +110 -0
  9. npcpy/ft/ge.py +115 -0
  10. npcpy/ft/memory_trainer.py +171 -0
  11. npcpy/ft/model_ensembler.py +357 -0
  12. npcpy/ft/rl.py +360 -0
  13. npcpy/ft/sft.py +248 -0
  14. npcpy/ft/usft.py +128 -0
  15. npcpy/gen/audio_gen.py +24 -0
  16. npcpy/gen/embeddings.py +13 -13
  17. npcpy/gen/image_gen.py +262 -117
  18. npcpy/gen/response.py +615 -415
  19. npcpy/gen/video_gen.py +53 -7
  20. npcpy/llm_funcs.py +1869 -437
  21. npcpy/main.py +1 -1
  22. npcpy/memory/command_history.py +844 -510
  23. npcpy/memory/kg_vis.py +833 -0
  24. npcpy/memory/knowledge_graph.py +892 -1845
  25. npcpy/memory/memory_processor.py +81 -0
  26. npcpy/memory/search.py +188 -90
  27. npcpy/mix/debate.py +192 -3
  28. npcpy/npc_compiler.py +1672 -801
  29. npcpy/npc_sysenv.py +593 -1266
  30. npcpy/serve.py +3120 -0
  31. npcpy/sql/ai_function_tools.py +257 -0
  32. npcpy/sql/database_ai_adapters.py +186 -0
  33. npcpy/sql/database_ai_functions.py +163 -0
  34. npcpy/sql/model_runner.py +19 -19
  35. npcpy/sql/npcsql.py +706 -507
  36. npcpy/sql/sql_model_compiler.py +156 -0
  37. npcpy/tools.py +183 -0
  38. npcpy/work/plan.py +13 -279
  39. npcpy/work/trigger.py +3 -3
  40. npcpy-1.2.32.dist-info/METADATA +803 -0
  41. npcpy-1.2.32.dist-info/RECORD +54 -0
  42. npcpy/data/dataframes.py +0 -171
  43. npcpy/memory/deep_research.py +0 -125
  44. npcpy/memory/sleep.py +0 -557
  45. npcpy/modes/_state.py +0 -78
  46. npcpy/modes/alicanto.py +0 -1075
  47. npcpy/modes/guac.py +0 -785
  48. npcpy/modes/mcp_npcsh.py +0 -822
  49. npcpy/modes/npc.py +0 -213
  50. npcpy/modes/npcsh.py +0 -1158
  51. npcpy/modes/plonk.py +0 -409
  52. npcpy/modes/pti.py +0 -234
  53. npcpy/modes/serve.py +0 -1637
  54. npcpy/modes/spool.py +0 -312
  55. npcpy/modes/wander.py +0 -549
  56. npcpy/modes/yap.py +0 -572
  57. npcpy/npc_team/alicanto.npc +0 -2
  58. npcpy/npc_team/alicanto.png +0 -0
  59. npcpy/npc_team/assembly_lines/test_pipeline.py +0 -181
  60. npcpy/npc_team/corca.npc +0 -13
  61. npcpy/npc_team/foreman.npc +0 -7
  62. npcpy/npc_team/frederic.npc +0 -6
  63. npcpy/npc_team/frederic4.png +0 -0
  64. npcpy/npc_team/guac.png +0 -0
  65. npcpy/npc_team/jinxs/automator.jinx +0 -18
  66. npcpy/npc_team/jinxs/bash_executer.jinx +0 -31
  67. npcpy/npc_team/jinxs/calculator.jinx +0 -11
  68. npcpy/npc_team/jinxs/edit_file.jinx +0 -96
  69. npcpy/npc_team/jinxs/file_chat.jinx +0 -14
  70. npcpy/npc_team/jinxs/gui_controller.jinx +0 -28
  71. npcpy/npc_team/jinxs/image_generation.jinx +0 -29
  72. npcpy/npc_team/jinxs/internet_search.jinx +0 -30
  73. npcpy/npc_team/jinxs/local_search.jinx +0 -152
  74. npcpy/npc_team/jinxs/npcsh_executor.jinx +0 -31
  75. npcpy/npc_team/jinxs/python_executor.jinx +0 -8
  76. npcpy/npc_team/jinxs/screen_cap.jinx +0 -25
  77. npcpy/npc_team/jinxs/sql_executor.jinx +0 -33
  78. npcpy/npc_team/kadiefa.npc +0 -3
  79. npcpy/npc_team/kadiefa.png +0 -0
  80. npcpy/npc_team/npcsh.ctx +0 -9
  81. npcpy/npc_team/npcsh_sibiji.png +0 -0
  82. npcpy/npc_team/plonk.npc +0 -2
  83. npcpy/npc_team/plonk.png +0 -0
  84. npcpy/npc_team/plonkjr.npc +0 -2
  85. npcpy/npc_team/plonkjr.png +0 -0
  86. npcpy/npc_team/sibiji.npc +0 -5
  87. npcpy/npc_team/sibiji.png +0 -0
  88. npcpy/npc_team/spool.png +0 -0
  89. npcpy/npc_team/templates/analytics/celona.npc +0 -0
  90. npcpy/npc_team/templates/hr_support/raone.npc +0 -0
  91. npcpy/npc_team/templates/humanities/eriane.npc +0 -4
  92. npcpy/npc_team/templates/it_support/lineru.npc +0 -0
  93. npcpy/npc_team/templates/marketing/slean.npc +0 -4
  94. npcpy/npc_team/templates/philosophy/maurawa.npc +0 -0
  95. npcpy/npc_team/templates/sales/turnic.npc +0 -4
  96. npcpy/npc_team/templates/software/welxor.npc +0 -0
  97. npcpy/npc_team/yap.png +0 -0
  98. npcpy/routes.py +0 -958
  99. npcpy/work/mcp_helpers.py +0 -357
  100. npcpy/work/mcp_server.py +0 -194
  101. npcpy-1.0.26.data/data/npcpy/npc_team/alicanto.npc +0 -2
  102. npcpy-1.0.26.data/data/npcpy/npc_team/alicanto.png +0 -0
  103. npcpy-1.0.26.data/data/npcpy/npc_team/automator.jinx +0 -18
  104. npcpy-1.0.26.data/data/npcpy/npc_team/bash_executer.jinx +0 -31
  105. npcpy-1.0.26.data/data/npcpy/npc_team/calculator.jinx +0 -11
  106. npcpy-1.0.26.data/data/npcpy/npc_team/celona.npc +0 -0
  107. npcpy-1.0.26.data/data/npcpy/npc_team/corca.npc +0 -13
  108. npcpy-1.0.26.data/data/npcpy/npc_team/edit_file.jinx +0 -96
  109. npcpy-1.0.26.data/data/npcpy/npc_team/eriane.npc +0 -4
  110. npcpy-1.0.26.data/data/npcpy/npc_team/file_chat.jinx +0 -14
  111. npcpy-1.0.26.data/data/npcpy/npc_team/foreman.npc +0 -7
  112. npcpy-1.0.26.data/data/npcpy/npc_team/frederic.npc +0 -6
  113. npcpy-1.0.26.data/data/npcpy/npc_team/frederic4.png +0 -0
  114. npcpy-1.0.26.data/data/npcpy/npc_team/guac.png +0 -0
  115. npcpy-1.0.26.data/data/npcpy/npc_team/gui_controller.jinx +0 -28
  116. npcpy-1.0.26.data/data/npcpy/npc_team/image_generation.jinx +0 -29
  117. npcpy-1.0.26.data/data/npcpy/npc_team/internet_search.jinx +0 -30
  118. npcpy-1.0.26.data/data/npcpy/npc_team/kadiefa.npc +0 -3
  119. npcpy-1.0.26.data/data/npcpy/npc_team/kadiefa.png +0 -0
  120. npcpy-1.0.26.data/data/npcpy/npc_team/lineru.npc +0 -0
  121. npcpy-1.0.26.data/data/npcpy/npc_team/local_search.jinx +0 -152
  122. npcpy-1.0.26.data/data/npcpy/npc_team/maurawa.npc +0 -0
  123. npcpy-1.0.26.data/data/npcpy/npc_team/npcsh.ctx +0 -9
  124. npcpy-1.0.26.data/data/npcpy/npc_team/npcsh_executor.jinx +0 -31
  125. npcpy-1.0.26.data/data/npcpy/npc_team/npcsh_sibiji.png +0 -0
  126. npcpy-1.0.26.data/data/npcpy/npc_team/plonk.npc +0 -2
  127. npcpy-1.0.26.data/data/npcpy/npc_team/plonk.png +0 -0
  128. npcpy-1.0.26.data/data/npcpy/npc_team/plonkjr.npc +0 -2
  129. npcpy-1.0.26.data/data/npcpy/npc_team/plonkjr.png +0 -0
  130. npcpy-1.0.26.data/data/npcpy/npc_team/python_executor.jinx +0 -8
  131. npcpy-1.0.26.data/data/npcpy/npc_team/raone.npc +0 -0
  132. npcpy-1.0.26.data/data/npcpy/npc_team/screen_cap.jinx +0 -25
  133. npcpy-1.0.26.data/data/npcpy/npc_team/sibiji.npc +0 -5
  134. npcpy-1.0.26.data/data/npcpy/npc_team/sibiji.png +0 -0
  135. npcpy-1.0.26.data/data/npcpy/npc_team/slean.npc +0 -4
  136. npcpy-1.0.26.data/data/npcpy/npc_team/spool.png +0 -0
  137. npcpy-1.0.26.data/data/npcpy/npc_team/sql_executor.jinx +0 -33
  138. npcpy-1.0.26.data/data/npcpy/npc_team/test_pipeline.py +0 -181
  139. npcpy-1.0.26.data/data/npcpy/npc_team/turnic.npc +0 -4
  140. npcpy-1.0.26.data/data/npcpy/npc_team/welxor.npc +0 -0
  141. npcpy-1.0.26.data/data/npcpy/npc_team/yap.png +0 -0
  142. npcpy-1.0.26.dist-info/METADATA +0 -827
  143. npcpy-1.0.26.dist-info/RECORD +0 -139
  144. npcpy-1.0.26.dist-info/entry_points.txt +0 -11
  145. /npcpy/{modes → ft}/__init__.py +0 -0
  146. {npcpy-1.0.26.dist-info → npcpy-1.2.32.dist-info}/WHEEL +0 -0
  147. {npcpy-1.0.26.dist-info → npcpy-1.2.32.dist-info}/licenses/LICENSE +0 -0
  148. {npcpy-1.0.26.dist-info → npcpy-1.2.32.dist-info}/top_level.txt +0 -0
npcpy/__init__.py CHANGED
@@ -1,13 +1,6 @@
1
1
  from . import npc_compiler
2
2
  from . import npc_sysenv
3
- from . import routes
4
3
  from . import llm_funcs
5
- from . import modes
6
- try:
7
- from . import npcs
8
- except ImportError:
9
- pass
10
4
  from . import sql
11
5
  from . import work
12
-
13
6
  from . import gen
npcpy/data/audio.py CHANGED
@@ -25,7 +25,7 @@ try:
25
25
  RATE = 16000
26
26
  CHUNK = 512
27
27
 
28
- # State Management
28
+
29
29
  is_speaking = False
30
30
  should_stop_speaking = False
31
31
  tts_sequence = 0
@@ -35,12 +35,12 @@ try:
35
35
  last_speech_time = 0
36
36
  running = True
37
37
 
38
- # Queues
38
+
39
39
  audio_queue = queue.Queue()
40
40
  tts_queue = queue.PriorityQueue()
41
41
  cleanup_files = []
42
42
 
43
- # Initialize pygame mixer
43
+
44
44
  pygame.mixer.quit()
45
45
  pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=512)
46
46
  except:
@@ -49,7 +49,7 @@ except:
49
49
 
50
50
  def convert_mp3_to_wav(mp3_file, wav_file):
51
51
  try:
52
- # Ensure the output file doesn't exist before conversion
52
+
53
53
  if os.path.exists(wav_file):
54
54
  os.remove(wav_file)
55
55
 
@@ -79,7 +79,7 @@ def convert_mp3_to_wav(mp3_file, wav_file):
79
79
  raise
80
80
 
81
81
 
82
- # Check if FFmpeg is available
82
+
83
83
  def check_ffmpeg():
84
84
  try:
85
85
  subprocess.run(
@@ -89,38 +89,6 @@ def check_ffmpeg():
89
89
  except (subprocess.SubprocessError, FileNotFoundError):
90
90
  return False
91
91
 
92
- # History Management Functions
93
- def load_history():
94
- global history
95
- try:
96
- if os.path.exists(memory_file):
97
- with open(memory_file, "r") as f:
98
- history = json.load(f)
99
- except Exception as e:
100
- print(f"Error loading conversation history: {e}")
101
- history = []
102
-
103
-
104
- def save_history():
105
- try:
106
- with open(memory_file, "w") as f:
107
- json.dump(history, f)
108
- except Exception as e:
109
- print(f"Error saving conversation history: {e}")
110
-
111
-
112
- def add_exchange(user_input, assistant_response):
113
- global history
114
- exchange = {
115
- "user": user_input,
116
- "assistant": assistant_response,
117
- "timestamp": time.time(),
118
- }
119
- history.append(exchange)
120
- if len(history) > max_history:
121
- history.pop(0)
122
- save_history()
123
-
124
92
 
125
93
  def get_context_string():
126
94
  context = []
@@ -130,7 +98,7 @@ def get_context_string():
130
98
  return "\n".join(context)
131
99
 
132
100
 
133
- # Audio Management Functions
101
+
134
102
  def cleanup_temp_files():
135
103
  global cleanup_files
136
104
  for file in list(cleanup_files):
@@ -207,7 +175,7 @@ def run_transcription(audio_np):
207
175
  return None
208
176
 
209
177
 
210
- # History Management Functions
178
+
211
179
  def load_history():
212
180
  global history
213
181
  try:
@@ -248,7 +216,7 @@ def get_context_string():
248
216
  return "\n".join(context)
249
217
 
250
218
 
251
- # Audio Management Functions
219
+
252
220
  def cleanup_temp_files():
253
221
  global cleanup_files
254
222
  for file in list(cleanup_files):
@@ -287,7 +255,7 @@ def audio_callback(in_data, frame_count, time_info, status):
287
255
  return (in_data, pyaudio.paContinue)
288
256
 
289
257
 
290
- # Text-to-Speech Functions
258
+
291
259
  def play_audio_from_queue():
292
260
  global is_speaking, cleanup_files, should_stop_speaking
293
261
  next_sequence = 0
@@ -381,7 +349,7 @@ import uuid
381
349
 
382
350
  def create_and_queue_audio(text, state):
383
351
  """Create and queue audio with state awareness for TTS/recording coordination"""
384
- # Set TTS speaking flag
352
+
385
353
  state["tts_is_speaking"] = True
386
354
 
387
355
  if not text.strip():
@@ -400,12 +368,12 @@ def create_and_queue_audio(text, state):
400
368
 
401
369
  convert_mp3_to_wav(mp3_file, wav_file)
402
370
 
403
- # Play audio and wait for completion
371
+
404
372
  play_audio(wav_file, state)
405
373
  except Exception as e:
406
374
  print(f"Error in TTS process: {e}")
407
375
  finally:
408
- # Ensure flag is reset even if there's an error
376
+
409
377
  state["tts_is_speaking"] = False
410
378
  state["tts_just_finished"] = True
411
379
 
@@ -419,7 +387,7 @@ def create_and_queue_audio(text, state):
419
387
 
420
388
  def play_audio(filename, state):
421
389
  """Play audio with state awareness for TTS/recording coordination"""
422
- CHUNK = 4096 # Increased chunk size
390
+ CHUNK = 4096
423
391
 
424
392
  wf = wave.open(filename, "rb")
425
393
  p = pyaudio.PyAudio()
@@ -433,8 +401,8 @@ def play_audio(filename, state):
433
401
 
434
402
  data = wf.readframes(CHUNK)
435
403
 
436
- # This is blocking until audio is done playing
437
- while data and state["running"]: # Check if system still running
404
+
405
+ while data and state["running"]:
438
406
  stream.write(data)
439
407
  data = wf.readframes(CHUNK)
440
408
 
@@ -448,35 +416,6 @@ def play_audio(filename, state):
448
416
  pass
449
417
 
450
418
 
451
- def select_model():
452
- models = [
453
- "gpt-4o-mini",
454
- "claude-haiku-3-5-latest",
455
- ]
456
-
457
- while True:
458
- try:
459
- choice = input(
460
- "\nSelect a model number (or press Enter for default): "
461
- ).strip()
462
- if not choice:
463
- return models[0]["name"]
464
-
465
- choice = int(choice)
466
- if 1 <= choice <= len(models):
467
- selected_model = models[choice - 1]["name"]
468
- print(f"Selected model: {selected_model}")
469
- return selected_model
470
- else:
471
- print(f"Please enter a number between 1 and {len(models)}")
472
- except ValueError:
473
- print("Please enter a valid number")
474
- except Exception as e:
475
- print(f"Error selecting model: {str(e)}")
476
- if models:
477
- return models[0]["name"]
478
- return "gemma:2b"
479
-
480
419
 
481
420
  def process_response_chunk(text_chunk):
482
421
  if not text_chunk.strip():
@@ -486,32 +425,10 @@ def process_response_chunk(text_chunk):
486
425
 
487
426
 
488
427
  def process_text_for_tts(text):
489
- text = re.sub(r"[*<>{}()\[\]&%#@^_=+~]", "", text)
428
+ text = re.sub(r"[*<>{}()\[\]&%")
490
429
  text = text.strip()
491
430
  text = re.sub(r"(\w)\.(\w)\.", r"\1 \2 ", text)
492
431
  text = re.sub(r"([.!?])(\w)", r"\1 \2", text)
493
432
  return text
494
433
 
495
434
 
496
- """
497
-
498
- To use this code, you'll need to have the following dependencies installed:
499
-
500
- ```bash
501
- pip install numpy torch torchaudio faster-whisper pygame pyaudio gtts ollama
502
- ```
503
-
504
- And optionally FFmpeg for audio speed adjustment:
505
- ```bash
506
- # On Ubuntu/Debian
507
- sudo apt-get install ffmpeg
508
-
509
- # On MacOS with Homebrew
510
- brew install ffmpeg
511
-
512
- # On Windows with Chocolatey
513
- choco install ffmpeg
514
- ```
515
-
516
-
517
- """
npcpy/data/image.py CHANGED
@@ -11,25 +11,25 @@ from PIL import Image
11
11
  def _windows_snip_to_file(file_path: str) -> bool:
12
12
  """Helper function to trigger Windows snipping and save to file."""
13
13
  try:
14
- # Import Windows-specific modules only when needed
14
+
15
15
  import win32clipboard
16
16
  from PIL import ImageGrab
17
17
  from ctypes import windll
18
18
 
19
- # Simulate Windows + Shift + S
20
- windll.user32.keybd_event(0x5B, 0, 0, 0) # WIN down
21
- windll.user32.keybd_event(0x10, 0, 0, 0) # SHIFT down
22
- windll.user32.keybd_event(0x53, 0, 0, 0) # S down
23
- windll.user32.keybd_event(0x53, 0, 0x0002, 0) # S up
24
- windll.user32.keybd_event(0x10, 0, 0x0002, 0) # SHIFT up
25
- windll.user32.keybd_event(0x5B, 0, 0x0002, 0) # WIN up
19
+
20
+ windll.user32.keybd_event(0x5B, 0, 0, 0)
21
+ windll.user32.keybd_event(0x10, 0, 0, 0)
22
+ windll.user32.keybd_event(0x53, 0, 0, 0)
23
+ windll.user32.keybd_event(0x53, 0, 0x0002, 0)
24
+ windll.user32.keybd_event(0x10, 0, 0x0002, 0)
25
+ windll.user32.keybd_event(0x5B, 0, 0x0002, 0)
26
26
 
27
- # Wait for user to complete the snip
27
+
28
28
  print("Please select an area to capture...")
29
- time.sleep(1) # Give a moment for snipping jinx to start
29
+ time.sleep(1)
30
30
 
31
- # Keep checking clipboard for new image
32
- max_wait = 30 # Maximum seconds to wait
31
+
32
+ max_wait = 30
33
33
  start_time = time.time()
34
34
 
35
35
  while time.time() - start_time < max_wait:
@@ -49,36 +49,41 @@ def _windows_snip_to_file(file_path: str) -> bool:
49
49
  return False
50
50
 
51
51
 
52
- def capture_screenshot(npc: Any = None, full=False) -> Dict[str, str]:
52
+ def capture_screenshot( full=False) -> Dict[str, str]:
53
53
  """
54
54
  Function Description:
55
55
  This function captures a screenshot of the current screen and saves it to a file.
56
56
  Args:
57
57
  npc: The NPC object representing the current NPC.
58
- full: Boolean to determine if full screen capture is needed
58
+ full: Boolean to determine if full screen capture is needed. Default to true.
59
+ path: Optional path to save the screenshot. Must not use placeholders. Relative paths preferred if the user specifies they want a specific path, otherwise default to None.
59
60
  Returns:
60
61
  A dictionary containing the filename, file path, and model kwargs.
61
62
  """
62
- # Ensure the directory exists
63
+
64
+
63
65
  directory = os.path.expanduser("~/.npcsh/screenshots")
64
- os.makedirs(directory, exist_ok=True)
66
+ timestamp = time.strftime("%Y%m%d_%H%M%S")
67
+ filename = f"screenshot_{timestamp}.png"
65
68
 
66
- # Generate a unique filename
67
- filename = f"screenshot_{int(time.time())}.png"
68
69
  file_path = os.path.join(directory, filename)
70
+ os.makedirs(directory, exist_ok=True)
71
+
72
+
73
+
74
+
69
75
 
70
76
  system = platform.system()
77
+
71
78
  model_kwargs = {}
72
79
 
73
- if npc is not None:
74
- if npc.provider is not None:
75
- model_kwargs["provider"] = npc.provider
76
- if npc.model is not None:
77
- model_kwargs["model"] = npc.model
78
80
 
79
81
  if full:
80
- if system == "Darwin":
81
- subprocess.run(["screencapture", file_path])
82
+
83
+ if system.lower() == "darwin":
84
+
85
+ subprocess.run(["screencapture", file_path], capture_output=True)
86
+
82
87
  elif system == "Linux":
83
88
  if (
84
89
  subprocess.run(
@@ -95,39 +100,35 @@ def capture_screenshot(npc: Any = None, full=False) -> Dict[str, str]:
95
100
  subprocess.Popen(["scrot", file_path])
96
101
  while not os.path.exists(file_path):
97
102
  time.sleep(0.5)
98
- else:
99
- print(
100
- "No supported screenshot jinx found. Please install gnome-screenshot or scrot."
101
- )
102
- return None
103
+
103
104
  elif system == "Windows":
104
- # For full screen on Windows, we'll use a different approach
105
+
105
106
  try:
106
107
  import win32gui
107
108
  import win32ui
108
109
  import win32con
109
110
  from PIL import Image
110
111
 
111
- # Get screen dimensions
112
+
112
113
  width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN)
113
114
  height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN)
114
115
 
115
- # Create device context
116
+
116
117
  hdesktop = win32gui.GetDesktopWindow()
117
118
  desktop_dc = win32gui.GetWindowDC(hdesktop)
118
119
  img_dc = win32ui.CreateDCFromHandle(desktop_dc)
119
120
  mem_dc = img_dc.CreateCompatibleDC()
120
121
 
121
- # Create bitmap
122
+
122
123
  screenshot = win32ui.CreateBitmap()
123
124
  screenshot.CreateCompatibleBitmap(img_dc, width, height)
124
125
  mem_dc.SelectObject(screenshot)
125
126
  mem_dc.BitBlt((0, 0), (width, height), img_dc, (0, 0), win32con.SRCCOPY)
126
127
 
127
- # Save
128
+
128
129
  screenshot.SaveBitmapFile(mem_dc, file_path)
129
130
 
130
- # Cleanup
131
+
131
132
  mem_dc.DeleteDC()
132
133
  win32gui.DeleteObject(screenshot.GetHandle())
133
134
 
@@ -172,7 +173,7 @@ def capture_screenshot(npc: Any = None, full=False) -> Dict[str, str]:
172
173
  print(f"Unsupported operating system: {system}")
173
174
  return None
174
175
 
175
- # Check if screenshot was successfully saved
176
+
176
177
  if os.path.exists(file_path):
177
178
  print(f"Screenshot saved to: {file_path}")
178
179
  return {
@@ -185,24 +186,24 @@ def capture_screenshot(npc: Any = None, full=False) -> Dict[str, str]:
185
186
  return None
186
187
 
187
188
  def compress_image(image_bytes, max_size=(800, 600)):
188
- # Create a copy of the bytes in memory
189
+
189
190
  buffer = io.BytesIO(image_bytes)
190
191
  img = Image.open(buffer)
191
192
 
192
- # Force loading of image data
193
+
193
194
  img.load()
194
195
 
195
- # Convert RGBA to RGB if necessary
196
+
196
197
  if img.mode == "RGBA":
197
198
  background = Image.new("RGB", img.size, (255, 255, 255))
198
199
  background.paste(img, mask=img.split()[3])
199
200
  img = background
200
201
 
201
- # Resize if needed
202
+
202
203
  if img.size[0] > max_size[0] or img.size[1] > max_size[1]:
203
204
  img.thumbnail(max_size)
204
205
 
205
- # Save with minimal compression
206
+
206
207
  out_buffer = io.BytesIO()
207
208
  img.save(out_buffer, format="JPEG", quality=95, optimize=False)
208
209
  return out_buffer.getvalue()
npcpy/data/load.py CHANGED
@@ -1,37 +1,45 @@
1
- import fitz # PyMuPDF
1
+ import fitz
2
2
  import pandas as pd
3
3
  import json
4
4
  import io
5
5
  from PIL import Image
6
6
  import numpy as np
7
7
  from typing import Optional
8
-
9
8
  import os
10
9
 
10
+ try:
11
+ from docx import Document
12
+ except ImportError:
13
+ Document = None
14
+
15
+ try:
16
+ from pptx import Presentation
17
+ except ImportError:
18
+ Presentation = None
19
+
20
+ try:
21
+ from bs4 import BeautifulSoup
22
+ except ImportError:
23
+ BeautifulSoup = None
24
+
11
25
  def load_csv(file_path):
12
26
  df = pd.read_csv(file_path)
13
27
  return df
14
28
 
15
-
16
29
  def load_json(file_path):
17
- with open(file_path, "r") as f:
30
+ with open(file_path, "r", encoding='utf-8') as f:
18
31
  data = json.load(f)
19
- df = pd.DataFrame(data)
20
- return df
21
-
32
+ return data
22
33
 
23
34
  def load_txt(file_path):
24
- with open(file_path, "r") as f:
35
+ with open(file_path, "r", encoding='utf-8') as f:
25
36
  text = f.read()
26
- df = pd.DataFrame({"text": [text]})
27
- return df
28
-
37
+ return text
29
38
 
30
39
  def load_excel(file_path):
31
40
  df = pd.read_excel(file_path)
32
41
  return df
33
42
 
34
-
35
43
  def load_image(file_path):
36
44
  img = Image.open(file_path)
37
45
  img_array = np.array(img)
@@ -44,45 +52,37 @@ def load_image(file_path):
44
52
  )
45
53
  return df
46
54
 
47
-
48
55
  def load_pdf(file_path):
49
56
  pdf_document = fitz.open(file_path)
50
- texts = []
51
- images = []
52
-
53
- for page_num, page in enumerate(pdf_document):
54
- # Extract text
55
- text = page.get_text()
56
- texts.append({"page": page_num + 1, "content": text})
57
-
58
- # Extract images
59
- image_list = page.get_images(full=True)
60
- for img_index, img in enumerate(image_list):
61
- xref = img[0]
62
- base_image = pdf_document.extract_image(xref)
63
- image_bytes = base_image["image"]
64
-
65
- # Convert image to numpy array
66
- image = Image.open(io.BytesIO(image_bytes))
67
- img_array = np.array(image)
68
-
69
- images.append(
70
- {
71
- "page": page_num + 1,
72
- "index": img_index + 1,
73
- "array": img_array.tobytes(),
74
- "shape": img_array.shape,
75
- "dtype": str(img_array.dtype),
76
- }
77
- )
78
-
79
- # Create DataFrame
80
- df = pd.DataFrame(
81
- {"texts": json.dumps(texts), "images": json.dumps(images)}, index=[0]
82
- )
83
-
84
- return df
85
-
57
+ full_text = ""
58
+ for page in pdf_document:
59
+ full_text += page.get_text() + "\n"
60
+ return full_text
61
+
62
+ def load_docx(file_path):
63
+ if Document is None:
64
+ raise ImportError("Please install python-docx to load .docx files.")
65
+ doc = Document(file_path)
66
+ full_text = "\n".join([para.text for para in doc.paragraphs])
67
+ return full_text
68
+
69
+ def load_pptx(file_path):
70
+ if Presentation is None:
71
+ raise ImportError("Please install python-pptx to load .pptx files.")
72
+ prs = Presentation(file_path)
73
+ full_text = ""
74
+ for slide in prs.slides:
75
+ for shape in slide.shapes:
76
+ if hasattr(shape, "text"):
77
+ full_text += shape.text + "\n"
78
+ return full_text
79
+
80
+ def load_html(file_path):
81
+ if BeautifulSoup is None:
82
+ raise ImportError("Please install beautifulsoup4 to load .html files.")
83
+ with open(file_path, 'r', encoding='utf-8') as f:
84
+ soup = BeautifulSoup(f, 'html.parser')
85
+ return soup.get_text(separator='\n', strip=True)
86
86
 
87
87
  extension_map = {
88
88
  "PNG": "images",
@@ -96,100 +96,59 @@ extension_map = {
96
96
  "WMV": "videos",
97
97
  "MPG": "videos",
98
98
  "MPEG": "videos",
99
- "DOC": "documents",
100
99
  "DOCX": "documents",
101
- "PDF": "documents",
102
- "PPT": "documents",
103
100
  "PPTX": "documents",
104
- "XLS": "documents",
101
+ "PDF": "documents",
105
102
  "XLSX": "documents",
106
103
  "TXT": "documents",
107
104
  "CSV": "documents",
105
+ "MD": "documents",
106
+ "HTML": "documents",
107
+ "HTM": "documents",
108
108
  "ZIP": "archives",
109
109
  "RAR": "archives",
110
110
  "7Z": "archives",
111
111
  "TAR": "archives",
112
112
  "GZ": "archives",
113
- "BZ2": "archives",
114
- "ISO": "archives",
115
113
  }
116
114
 
117
-
118
- def load_file_contents(file_path, chunk_size=250):
119
- """
120
- Load and format the contents of a file based on its extension.
121
- Returns a list of chunks from the file content.
122
- """
115
+ def load_file_contents(file_path, chunk_size=None):
123
116
  file_ext = os.path.splitext(file_path)[1].upper().lstrip('.')
124
- chunks = []
125
-
117
+ full_content = ""
118
+ if not isinstance(chunk_size, int):
119
+ chunk_size=250
126
120
  try:
127
121
  if file_ext == 'PDF':
128
- # Load PDF content
129
- pdf_document = fitz.open(file_path)
130
- full_text = ""
131
-
132
- # Extract text from each page
133
- for page in pdf_document:
134
- full_text += page.get_text() + "\n\n"
135
-
136
- # Chunk the text
137
- for i in range(0, len(full_text), chunk_size):
138
- chunk = full_text[i:i+chunk_size].strip()
139
- if chunk: # Skip empty chunks
140
- chunks.append(chunk)
141
-
122
+ full_content = load_pdf(file_path)
123
+ elif file_ext == 'DOCX':
124
+ full_content = load_docx(file_path)
125
+ elif file_ext == 'PPTX':
126
+ full_content = load_pptx(file_path)
127
+ elif file_ext in ['HTML', 'HTM']:
128
+ full_content = load_html(file_path)
142
129
  elif file_ext == 'CSV':
143
- df = pd.read_csv(file_path)
144
- # Add metadata as first chunk
145
- meta = f"CSV Columns: {', '.join(df.columns)}\nRows: {len(df)}"
146
- chunks.append(meta)
147
-
148
- # Convert sample data to string and chunk it
149
- sample = df.head(20).to_string()
150
- for i in range(0, len(sample), chunk_size):
151
- chunk = sample[i:i+chunk_size].strip()
152
- if chunk:
153
- chunks.append(chunk)
154
-
130
+ df = load_csv(file_path)
131
+ full_content = df.to_string()
155
132
  elif file_ext in ['XLS', 'XLSX']:
156
- df = pd.read_excel(file_path)
157
- # Add metadata as first chunk
158
- meta = f"Excel Columns: {', '.join(df.columns)}\nRows: {len(df)}"
159
- chunks.append(meta)
160
-
161
- # Convert sample data to string and chunk it
162
- sample = df.head(20).to_string()
163
- for i in range(0, len(sample), chunk_size):
164
- chunk = sample[i:i+chunk_size].strip()
165
- if chunk:
166
- chunks.append(chunk)
167
-
168
- elif file_ext == 'TXT':
169
- with open(file_path, 'r', encoding='utf-8') as f:
170
- content = f.read()
171
-
172
- # Chunk the text
173
- for i in range(0, len(content), chunk_size):
174
- chunk = content[i:i+chunk_size].strip()
175
- if chunk:
176
- chunks.append(chunk)
177
-
133
+ df = load_excel(file_path)
134
+ full_content = df.to_string()
135
+ elif file_ext in ['TXT', 'MD', 'PY', 'JSX', 'TSX', 'TS', 'JS', 'JSON', 'SQL', 'NPC', 'JINX', 'LINE', 'YAML', 'DART', 'JAVA']:
136
+ full_content = load_txt(file_path)
178
137
  elif file_ext == 'JSON':
179
- with open(file_path, 'r', encoding='utf-8') as f:
180
- data = json.load(f)
181
- content = json.dumps(data, indent=2)
182
-
183
- # Chunk the JSON
184
- for i in range(0, len(content), chunk_size):
185
- chunk = content[i:i+chunk_size].strip()
186
- if chunk:
187
- chunks.append(chunk)
188
-
138
+ data = load_json(file_path)
139
+ full_content = json.dumps(data, indent=2)
189
140
  else:
190
- chunks.append(f"Unsupported file format: {file_ext}")
191
-
141
+ return [f"Unsupported file format for content loading: {file_ext}"]
142
+
143
+ if not full_content:
144
+ return []
145
+
146
+ chunks = []
147
+ for i in range(0, len(full_content), chunk_size):
148
+ chunk = full_content[i:i+chunk_size].strip()
149
+ if chunk:
150
+ chunks.append(chunk)
192
151
  return chunks
193
152
 
194
153
  except Exception as e:
195
- return [f"Error loading file {file_path}: {str(e)}"]
154
+ return [f"Error loading file {file_path}: {str(e)}"]