PyPI - npcpy - Versions diffs - 1.0.26__py3-none-any.whl → 1.2.32__py3-none-any.whl - Mend

npcpy 1.0.26py3-none-any.whl → 1.2.32py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

npcpy/__init__.py +0 -7
npcpy/data/audio.py +16 -99
npcpy/data/image.py +43 -42
npcpy/data/load.py +83 -124
npcpy/data/text.py +28 -28
npcpy/data/video.py +8 -32
npcpy/data/web.py +51 -23
npcpy/ft/diff.py +110 -0
npcpy/ft/ge.py +115 -0
npcpy/ft/memory_trainer.py +171 -0
npcpy/ft/model_ensembler.py +357 -0
npcpy/ft/rl.py +360 -0
npcpy/ft/sft.py +248 -0
npcpy/ft/usft.py +128 -0
npcpy/gen/audio_gen.py +24 -0
npcpy/gen/embeddings.py +13 -13
npcpy/gen/image_gen.py +262 -117
npcpy/gen/response.py +615 -415
npcpy/gen/video_gen.py +53 -7
npcpy/llm_funcs.py +1869 -437
npcpy/main.py +1 -1
npcpy/memory/command_history.py +844 -510
npcpy/memory/kg_vis.py +833 -0
npcpy/memory/knowledge_graph.py +892 -1845
npcpy/memory/memory_processor.py +81 -0
npcpy/memory/search.py +188 -90
npcpy/mix/debate.py +192 -3
npcpy/npc_compiler.py +1672 -801
npcpy/npc_sysenv.py +593 -1266
npcpy/serve.py +3120 -0
npcpy/sql/ai_function_tools.py +257 -0
npcpy/sql/database_ai_adapters.py +186 -0
npcpy/sql/database_ai_functions.py +163 -0
npcpy/sql/model_runner.py +19 -19
npcpy/sql/npcsql.py +706 -507
npcpy/sql/sql_model_compiler.py +156 -0
npcpy/tools.py +183 -0
npcpy/work/plan.py +13 -279
npcpy/work/trigger.py +3 -3
npcpy-1.2.32.dist-info/METADATA +803 -0
npcpy-1.2.32.dist-info/RECORD +54 -0
npcpy/data/dataframes.py +0 -171
npcpy/memory/deep_research.py +0 -125
npcpy/memory/sleep.py +0 -557
npcpy/modes/_state.py +0 -78
npcpy/modes/alicanto.py +0 -1075
npcpy/modes/guac.py +0 -785
npcpy/modes/mcp_npcsh.py +0 -822
npcpy/modes/npc.py +0 -213
npcpy/modes/npcsh.py +0 -1158
npcpy/modes/plonk.py +0 -409
npcpy/modes/pti.py +0 -234
npcpy/modes/serve.py +0 -1637
npcpy/modes/spool.py +0 -312
npcpy/modes/wander.py +0 -549
npcpy/modes/yap.py +0 -572
npcpy/npc_team/alicanto.npc +0 -2
npcpy/npc_team/alicanto.png +0 -0
npcpy/npc_team/assembly_lines/test_pipeline.py +0 -181
npcpy/npc_team/corca.npc +0 -13
npcpy/npc_team/foreman.npc +0 -7
npcpy/npc_team/frederic.npc +0 -6
npcpy/npc_team/frederic4.png +0 -0
npcpy/npc_team/guac.png +0 -0
npcpy/npc_team/jinxs/automator.jinx +0 -18
npcpy/npc_team/jinxs/bash_executer.jinx +0 -31
npcpy/npc_team/jinxs/calculator.jinx +0 -11
npcpy/npc_team/jinxs/edit_file.jinx +0 -96
npcpy/npc_team/jinxs/file_chat.jinx +0 -14
npcpy/npc_team/jinxs/gui_controller.jinx +0 -28
npcpy/npc_team/jinxs/image_generation.jinx +0 -29
npcpy/npc_team/jinxs/internet_search.jinx +0 -30
npcpy/npc_team/jinxs/local_search.jinx +0 -152
npcpy/npc_team/jinxs/npcsh_executor.jinx +0 -31
npcpy/npc_team/jinxs/python_executor.jinx +0 -8
npcpy/npc_team/jinxs/screen_cap.jinx +0 -25
npcpy/npc_team/jinxs/sql_executor.jinx +0 -33
npcpy/npc_team/kadiefa.npc +0 -3
npcpy/npc_team/kadiefa.png +0 -0
npcpy/npc_team/npcsh.ctx +0 -9
npcpy/npc_team/npcsh_sibiji.png +0 -0
npcpy/npc_team/plonk.npc +0 -2
npcpy/npc_team/plonk.png +0 -0
npcpy/npc_team/plonkjr.npc +0 -2
npcpy/npc_team/plonkjr.png +0 -0
npcpy/npc_team/sibiji.npc +0 -5
npcpy/npc_team/sibiji.png +0 -0
npcpy/npc_team/spool.png +0 -0
npcpy/npc_team/templates/analytics/celona.npc +0 -0
npcpy/npc_team/templates/hr_support/raone.npc +0 -0
npcpy/npc_team/templates/humanities/eriane.npc +0 -4
npcpy/npc_team/templates/it_support/lineru.npc +0 -0
npcpy/npc_team/templates/marketing/slean.npc +0 -4
npcpy/npc_team/templates/philosophy/maurawa.npc +0 -0
npcpy/npc_team/templates/sales/turnic.npc +0 -4
npcpy/npc_team/templates/software/welxor.npc +0 -0
npcpy/npc_team/yap.png +0 -0
npcpy/routes.py +0 -958
npcpy/work/mcp_helpers.py +0 -357
npcpy/work/mcp_server.py +0 -194
npcpy-1.0.26.data/data/npcpy/npc_team/alicanto.npc +0 -2
npcpy-1.0.26.data/data/npcpy/npc_team/alicanto.png +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/automator.jinx +0 -18
npcpy-1.0.26.data/data/npcpy/npc_team/bash_executer.jinx +0 -31
npcpy-1.0.26.data/data/npcpy/npc_team/calculator.jinx +0 -11
npcpy-1.0.26.data/data/npcpy/npc_team/celona.npc +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/corca.npc +0 -13
npcpy-1.0.26.data/data/npcpy/npc_team/edit_file.jinx +0 -96
npcpy-1.0.26.data/data/npcpy/npc_team/eriane.npc +0 -4
npcpy-1.0.26.data/data/npcpy/npc_team/file_chat.jinx +0 -14
npcpy-1.0.26.data/data/npcpy/npc_team/foreman.npc +0 -7
npcpy-1.0.26.data/data/npcpy/npc_team/frederic.npc +0 -6
npcpy-1.0.26.data/data/npcpy/npc_team/frederic4.png +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/guac.png +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/gui_controller.jinx +0 -28
npcpy-1.0.26.data/data/npcpy/npc_team/image_generation.jinx +0 -29
npcpy-1.0.26.data/data/npcpy/npc_team/internet_search.jinx +0 -30
npcpy-1.0.26.data/data/npcpy/npc_team/kadiefa.npc +0 -3
npcpy-1.0.26.data/data/npcpy/npc_team/kadiefa.png +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/lineru.npc +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/local_search.jinx +0 -152
npcpy-1.0.26.data/data/npcpy/npc_team/maurawa.npc +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/npcsh.ctx +0 -9
npcpy-1.0.26.data/data/npcpy/npc_team/npcsh_executor.jinx +0 -31
npcpy-1.0.26.data/data/npcpy/npc_team/npcsh_sibiji.png +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/plonk.npc +0 -2
npcpy-1.0.26.data/data/npcpy/npc_team/plonk.png +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/plonkjr.npc +0 -2
npcpy-1.0.26.data/data/npcpy/npc_team/plonkjr.png +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/python_executor.jinx +0 -8
npcpy-1.0.26.data/data/npcpy/npc_team/raone.npc +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/screen_cap.jinx +0 -25
npcpy-1.0.26.data/data/npcpy/npc_team/sibiji.npc +0 -5
npcpy-1.0.26.data/data/npcpy/npc_team/sibiji.png +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/slean.npc +0 -4
npcpy-1.0.26.data/data/npcpy/npc_team/spool.png +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/sql_executor.jinx +0 -33
npcpy-1.0.26.data/data/npcpy/npc_team/test_pipeline.py +0 -181
npcpy-1.0.26.data/data/npcpy/npc_team/turnic.npc +0 -4
npcpy-1.0.26.data/data/npcpy/npc_team/welxor.npc +0 -0
npcpy-1.0.26.data/data/npcpy/npc_team/yap.png +0 -0
npcpy-1.0.26.dist-info/METADATA +0 -827
npcpy-1.0.26.dist-info/RECORD +0 -139
npcpy-1.0.26.dist-info/entry_points.txt +0 -11
/npcpy/{modes → ft}/__init__.py +0 -0
{npcpy-1.0.26.dist-info → npcpy-1.2.32.dist-info}/WHEEL +0 -0
{npcpy-1.0.26.dist-info → npcpy-1.2.32.dist-info}/licenses/LICENSE +0 -0
{npcpy-1.0.26.dist-info → npcpy-1.2.32.dist-info}/top_level.txt +0 -0

npcpy/__init__.py CHANGED Viewed

@@ -1,13 +1,6 @@
 from . import npc_compiler
 from . import npc_sysenv
-from . import routes
 from . import llm_funcs
-from . import modes
-try:
-    from . import npcs
-except ImportError:
-    pass
 from . import sql
 from . import work
 from . import gen

npcpy/data/audio.py CHANGED Viewed

@@ -25,7 +25,7 @@ try:
     RATE = 16000
     CHUNK = 512
-    # State Management
     is_speaking = False
     should_stop_speaking = False
     tts_sequence = 0
@@ -35,12 +35,12 @@ try:
     last_speech_time = 0
     running = True
-    # Queues
     audio_queue = queue.Queue()
     tts_queue = queue.PriorityQueue()
     cleanup_files = []
-    # Initialize pygame mixer
     pygame.mixer.quit()
     pygame.mixer.init(frequency=44100, size=-16, channels=2, buffer=512)
 except:
@@ -49,7 +49,7 @@ except:
 def convert_mp3_to_wav(mp3_file, wav_file):
     try:
-        # Ensure the output file doesn't exist before conversion
         if os.path.exists(wav_file):
             os.remove(wav_file)
@@ -79,7 +79,7 @@ def convert_mp3_to_wav(mp3_file, wav_file):
         raise
-# Check if FFmpeg is available
 def check_ffmpeg():
     try:
         subprocess.run(
@@ -89,38 +89,6 @@ def check_ffmpeg():
     except (subprocess.SubprocessError, FileNotFoundError):
         return False
-# History Management Functions
-def load_history():
-    global history
-    try:
-        if os.path.exists(memory_file):
-            with open(memory_file, "r") as f:
-                history = json.load(f)
-    except Exception as e:
-        print(f"Error loading conversation history: {e}")
-        history = []
-def save_history():
-    try:
-        with open(memory_file, "w") as f:
-            json.dump(history, f)
-    except Exception as e:
-        print(f"Error saving conversation history: {e}")
-def add_exchange(user_input, assistant_response):
-    global history
-    exchange = {
-        "user": user_input,
-        "assistant": assistant_response,
-        "timestamp": time.time(),
-    }
-    history.append(exchange)
-    if len(history) > max_history:
-        history.pop(0)
-    save_history()
 def get_context_string():
     context = []
@@ -130,7 +98,7 @@ def get_context_string():
     return "\n".join(context)
-# Audio Management Functions
 def cleanup_temp_files():
     global cleanup_files
     for file in list(cleanup_files):
@@ -207,7 +175,7 @@ def run_transcription(audio_np):
         return None
-# History Management Functions
 def load_history():
     global history
     try:
@@ -248,7 +216,7 @@ def get_context_string():
     return "\n".join(context)
-# Audio Management Functions
 def cleanup_temp_files():
     global cleanup_files
     for file in list(cleanup_files):
@@ -287,7 +255,7 @@ def audio_callback(in_data, frame_count, time_info, status):
     return (in_data, pyaudio.paContinue)
-# Text-to-Speech Functions
 def play_audio_from_queue():
     global is_speaking, cleanup_files, should_stop_speaking
     next_sequence = 0
@@ -381,7 +349,7 @@ import uuid
 def create_and_queue_audio(text, state):
     """Create and queue audio with state awareness for TTS/recording coordination"""
-    # Set TTS speaking flag
     state["tts_is_speaking"] = True
     if not text.strip():
@@ -400,12 +368,12 @@ def create_and_queue_audio(text, state):
             convert_mp3_to_wav(mp3_file, wav_file)
-            # Play audio and wait for completion
             play_audio(wav_file, state)
     except Exception as e:
         print(f"Error in TTS process: {e}")
     finally:
-        # Ensure flag is reset even if there's an error
         state["tts_is_speaking"] = False
         state["tts_just_finished"] = True
@@ -419,7 +387,7 @@ def create_and_queue_audio(text, state):
 def play_audio(filename, state):
     """Play audio with state awareness for TTS/recording coordination"""
-    CHUNK = 4096  # Increased chunk size
+    CHUNK = 4096
     wf = wave.open(filename, "rb")
     p = pyaudio.PyAudio()
@@ -433,8 +401,8 @@ def play_audio(filename, state):
     data = wf.readframes(CHUNK)
-    # This is blocking until audio is done playing
-    while data and state["running"]:  # Check if system still running
+    while data and state["running"]:
         stream.write(data)
         data = wf.readframes(CHUNK)
@@ -448,35 +416,6 @@ def play_audio(filename, state):
         pass
-def select_model():
-    models = [
-        "gpt-4o-mini",
-        "claude-haiku-3-5-latest",
-    ]
-    while True:
-        try:
-            choice = input(
-                "\nSelect a model number (or press Enter for default): "
-            ).strip()
-            if not choice:
-                return models[0]["name"]
-            choice = int(choice)
-            if 1 <= choice <= len(models):
-                selected_model = models[choice - 1]["name"]
-                print(f"Selected model: {selected_model}")
-                return selected_model
-            else:
-                print(f"Please enter a number between 1 and {len(models)}")
-        except ValueError:
-            print("Please enter a valid number")
-        except Exception as e:
-            print(f"Error selecting model: {str(e)}")
-            if models:
-                return models[0]["name"]
-            return "gemma:2b"
 def process_response_chunk(text_chunk):
     if not text_chunk.strip():
@@ -486,32 +425,10 @@ def process_response_chunk(text_chunk):
 def process_text_for_tts(text):
-    text = re.sub(r"[*<>{}()\[\]&%#@^_=+~]", "", text)
+    text = re.sub(r"[*<>{}()\[\]&%")
     text = text.strip()
     text = re.sub(r"(\w)\.(\w)\.", r"\1 \2 ", text)
     text = re.sub(r"([.!?])(\w)", r"\1 \2", text)
     return text
-"""
-To use this code, you'll need to have the following dependencies installed:
-```bash
-pip install numpy torch torchaudio faster-whisper pygame pyaudio gtts ollama
-```
-And optionally FFmpeg for audio speed adjustment:
-```bash
-# On Ubuntu/Debian
-sudo apt-get install ffmpeg
-# On MacOS with Homebrew
-brew install ffmpeg
-# On Windows with Chocolatey
-choco install ffmpeg
-```
-"""

npcpy/data/image.py CHANGED Viewed

@@ -11,25 +11,25 @@ from PIL import Image
 def _windows_snip_to_file(file_path: str) -> bool:
     """Helper function to trigger Windows snipping and save to file."""
     try:
-        # Import Windows-specific modules only when needed
         import win32clipboard
         from PIL import ImageGrab
         from ctypes import windll
-        # Simulate Windows + Shift + S
-        windll.user32.keybd_event(0x5B, 0, 0, 0)  # WIN down
-        windll.user32.keybd_event(0x10, 0, 0, 0)  # SHIFT down
-        windll.user32.keybd_event(0x53, 0, 0, 0)  # S down
-        windll.user32.keybd_event(0x53, 0, 0x0002, 0)  # S up
-        windll.user32.keybd_event(0x10, 0, 0x0002, 0)  # SHIFT up
-        windll.user32.keybd_event(0x5B, 0, 0x0002, 0)  # WIN up
+        windll.user32.keybd_event(0x5B, 0, 0, 0)
+        windll.user32.keybd_event(0x10, 0, 0, 0)
+        windll.user32.keybd_event(0x53, 0, 0, 0)
+        windll.user32.keybd_event(0x53, 0, 0x0002, 0)
+        windll.user32.keybd_event(0x10, 0, 0x0002, 0)
+        windll.user32.keybd_event(0x5B, 0, 0x0002, 0)
-        # Wait for user to complete the snip
         print("Please select an area to capture...")
-        time.sleep(1)  # Give a moment for snipping jinx to start
+        time.sleep(1)
-        # Keep checking clipboard for new image
-        max_wait = 30  # Maximum seconds to wait
+        max_wait = 30
         start_time = time.time()
         while time.time() - start_time < max_wait:
@@ -49,36 +49,41 @@ def _windows_snip_to_file(file_path: str) -> bool:
         return False
-def capture_screenshot(npc: Any = None, full=False) -> Dict[str, str]:
+def capture_screenshot( full=False) -> Dict[str, str]:
     """
     Function Description:
         This function captures a screenshot of the current screen and saves it to a file.
     Args:
         npc: The NPC object representing the current NPC.
-        full: Boolean to determine if full screen capture is needed
+        full: Boolean to determine if full screen capture is needed. Default to true.
+        path: Optional path to save the screenshot. Must not use placeholders. Relative paths preferred if the user specifies they want a specific path, otherwise default to None.
     Returns:
         A dictionary containing the filename, file path, and model kwargs.
     """
-    # Ensure the directory exists
     directory = os.path.expanduser("~/.npcsh/screenshots")
-    os.makedirs(directory, exist_ok=True)
+    timestamp = time.strftime("%Y%m%d_%H%M%S")
+    filename = f"screenshot_{timestamp}.png"
-    # Generate a unique filename
-    filename = f"screenshot_{int(time.time())}.png"
     file_path = os.path.join(directory, filename)
+    os.makedirs(directory, exist_ok=True)
     system = platform.system()
     model_kwargs = {}
-    if npc is not None:
-        if npc.provider is not None:
-            model_kwargs["provider"] = npc.provider
-        if npc.model is not None:
-            model_kwargs["model"] = npc.model
     if full:
-        if system == "Darwin":
-            subprocess.run(["screencapture", file_path])
+        if system.lower() == "darwin":
+            subprocess.run(["screencapture", file_path], capture_output=True)
         elif system == "Linux":
             if (
                 subprocess.run(
@@ -95,39 +100,35 @@ def capture_screenshot(npc: Any = None, full=False) -> Dict[str, str]:
                 subprocess.Popen(["scrot", file_path])
                 while not os.path.exists(file_path):
                     time.sleep(0.5)
-            else:
-                print(
-                    "No supported screenshot jinx found. Please install gnome-screenshot or scrot."
-                )
-                return None
         elif system == "Windows":
-            # For full screen on Windows, we'll use a different approach
             try:
                 import win32gui
                 import win32ui
                 import win32con
                 from PIL import Image
-                # Get screen dimensions
                 width = win32api.GetSystemMetrics(win32con.SM_CXVIRTUALSCREEN)
                 height = win32api.GetSystemMetrics(win32con.SM_CYVIRTUALSCREEN)
-                # Create device context
                 hdesktop = win32gui.GetDesktopWindow()
                 desktop_dc = win32gui.GetWindowDC(hdesktop)
                 img_dc = win32ui.CreateDCFromHandle(desktop_dc)
                 mem_dc = img_dc.CreateCompatibleDC()
-                # Create bitmap
                 screenshot = win32ui.CreateBitmap()
                 screenshot.CreateCompatibleBitmap(img_dc, width, height)
                 mem_dc.SelectObject(screenshot)
                 mem_dc.BitBlt((0, 0), (width, height), img_dc, (0, 0), win32con.SRCCOPY)
-                # Save
                 screenshot.SaveBitmapFile(mem_dc, file_path)
-                # Cleanup
                 mem_dc.DeleteDC()
                 win32gui.DeleteObject(screenshot.GetHandle())
@@ -172,7 +173,7 @@ def capture_screenshot(npc: Any = None, full=False) -> Dict[str, str]:
             print(f"Unsupported operating system: {system}")
             return None
-    # Check if screenshot was successfully saved
     if os.path.exists(file_path):
         print(f"Screenshot saved to: {file_path}")
         return {
@@ -185,24 +186,24 @@ def capture_screenshot(npc: Any = None, full=False) -> Dict[str, str]:
         return None
 def compress_image(image_bytes, max_size=(800, 600)):
-    # Create a copy of the bytes in memory
     buffer = io.BytesIO(image_bytes)
     img = Image.open(buffer)
-    # Force loading of image data
     img.load()
-    # Convert RGBA to RGB if necessary
     if img.mode == "RGBA":
         background = Image.new("RGB", img.size, (255, 255, 255))
         background.paste(img, mask=img.split()[3])
         img = background
-    # Resize if needed
     if img.size[0] > max_size[0] or img.size[1] > max_size[1]:
         img.thumbnail(max_size)
-    # Save with minimal compression
     out_buffer = io.BytesIO()
     img.save(out_buffer, format="JPEG", quality=95, optimize=False)
     return out_buffer.getvalue()

npcpy/data/load.py CHANGED Viewed

@@ -1,37 +1,45 @@
-import fitz  # PyMuPDF
+import fitz
 import pandas as pd
 import json
 import io
 from PIL import Image
 import numpy as np
 from typing import Optional
 import os
+try:
+    from docx import Document
+except ImportError:
+    Document = None
+try:
+    from pptx import Presentation
+except ImportError:
+    Presentation = None
+try:
+    from bs4 import BeautifulSoup
+except ImportError:
+    BeautifulSoup = None
 def load_csv(file_path):
     df = pd.read_csv(file_path)
     return df
 def load_json(file_path):
-    with open(file_path, "r") as f:
+    with open(file_path, "r", encoding='utf-8') as f:
         data = json.load(f)
-    df = pd.DataFrame(data)
-    return df
+    return data
 def load_txt(file_path):
-    with open(file_path, "r") as f:
+    with open(file_path, "r", encoding='utf-8') as f:
         text = f.read()
-    df = pd.DataFrame({"text": [text]})
-    return df
+    return text
 def load_excel(file_path):
     df = pd.read_excel(file_path)
     return df
 def load_image(file_path):
     img = Image.open(file_path)
     img_array = np.array(img)
@@ -44,45 +52,37 @@ def load_image(file_path):
     )
     return df
 def load_pdf(file_path):
     pdf_document = fitz.open(file_path)
-    texts = []
-    images = []
-    for page_num, page in enumerate(pdf_document):
-        # Extract text
-        text = page.get_text()
-        texts.append({"page": page_num + 1, "content": text})
-        # Extract images
-        image_list = page.get_images(full=True)
-        for img_index, img in enumerate(image_list):
-            xref = img[0]
-            base_image = pdf_document.extract_image(xref)
-            image_bytes = base_image["image"]
-            # Convert image to numpy array
-            image = Image.open(io.BytesIO(image_bytes))
-            img_array = np.array(image)
-            images.append(
-                {
-                    "page": page_num + 1,
-                    "index": img_index + 1,
-                    "array": img_array.tobytes(),
-                    "shape": img_array.shape,
-                    "dtype": str(img_array.dtype),
-                }
-            )
-    # Create DataFrame
-    df = pd.DataFrame(
-        {"texts": json.dumps(texts), "images": json.dumps(images)}, index=[0]
-    )
-    return df
+    full_text = ""
+    for page in pdf_document:
+        full_text += page.get_text() + "\n"
+    return full_text
+def load_docx(file_path):
+    if Document is None:
+        raise ImportError("Please install python-docx to load .docx files.")
+    doc = Document(file_path)
+    full_text = "\n".join([para.text for para in doc.paragraphs])
+    return full_text
+def load_pptx(file_path):
+    if Presentation is None:
+        raise ImportError("Please install python-pptx to load .pptx files.")
+    prs = Presentation(file_path)
+    full_text = ""
+    for slide in prs.slides:
+        for shape in slide.shapes:
+            if hasattr(shape, "text"):
+                full_text += shape.text + "\n"
+    return full_text
+def load_html(file_path):
+    if BeautifulSoup is None:
+        raise ImportError("Please install beautifulsoup4 to load .html files.")
+    with open(file_path, 'r', encoding='utf-8') as f:
+        soup = BeautifulSoup(f, 'html.parser')
+    return soup.get_text(separator='\n', strip=True)
 extension_map = {
     "PNG": "images",
@@ -96,100 +96,59 @@ extension_map = {
     "WMV": "videos",
     "MPG": "videos",
     "MPEG": "videos",
-    "DOC": "documents",
     "DOCX": "documents",
-    "PDF": "documents",
-    "PPT": "documents",
     "PPTX": "documents",
-    "XLS": "documents",
+    "PDF": "documents",
     "XLSX": "documents",
     "TXT": "documents",
     "CSV": "documents",
+    "MD": "documents",
+    "HTML": "documents",
+    "HTM": "documents",
     "ZIP": "archives",
     "RAR": "archives",
     "7Z": "archives",
     "TAR": "archives",
     "GZ": "archives",
-    "BZ2": "archives",
-    "ISO": "archives",
 }
-def load_file_contents(file_path, chunk_size=250):
-    """
-    Load and format the contents of a file based on its extension.
-    Returns a list of chunks from the file content.
-    """
+def load_file_contents(file_path, chunk_size=None):
     file_ext = os.path.splitext(file_path)[1].upper().lstrip('.')
-    chunks = []
+    full_content = ""
+    if not isinstance(chunk_size, int):
+        chunk_size=250
     try:
         if file_ext == 'PDF':
-            # Load PDF content
-            pdf_document = fitz.open(file_path)
-            full_text = ""
-            # Extract text from each page
-            for page in pdf_document:
-                full_text += page.get_text() + "\n\n"
-            # Chunk the text
-            for i in range(0, len(full_text), chunk_size):
-                chunk = full_text[i:i+chunk_size].strip()
-                if chunk:  # Skip empty chunks
-                    chunks.append(chunk)
+            full_content = load_pdf(file_path)
+        elif file_ext == 'DOCX':
+            full_content = load_docx(file_path)
+        elif file_ext == 'PPTX':
+            full_content = load_pptx(file_path)
+        elif file_ext in ['HTML', 'HTM']:
+            full_content = load_html(file_path)
         elif file_ext == 'CSV':
-            df = pd.read_csv(file_path)
-            # Add metadata as first chunk
-            meta = f"CSV Columns: {', '.join(df.columns)}\nRows: {len(df)}"
-            chunks.append(meta)
-            # Convert sample data to string and chunk it
-            sample = df.head(20).to_string()
-            for i in range(0, len(sample), chunk_size):
-                chunk = sample[i:i+chunk_size].strip()
-                if chunk:
-                    chunks.append(chunk)
+            df = load_csv(file_path)
+            full_content = df.to_string()
         elif file_ext in ['XLS', 'XLSX']:
-            df = pd.read_excel(file_path)
-            # Add metadata as first chunk
-            meta = f"Excel Columns: {', '.join(df.columns)}\nRows: {len(df)}"
-            chunks.append(meta)
-            # Convert sample data to string and chunk it
-            sample = df.head(20).to_string()
-            for i in range(0, len(sample), chunk_size):
-                chunk = sample[i:i+chunk_size].strip()
-                if chunk:
-                    chunks.append(chunk)
-        elif file_ext == 'TXT':
-            with open(file_path, 'r', encoding='utf-8') as f:
-                content = f.read()
-            # Chunk the text
-            for i in range(0, len(content), chunk_size):
-                chunk = content[i:i+chunk_size].strip()
-                if chunk:
-                    chunks.append(chunk)
+            df = load_excel(file_path)
+            full_content = df.to_string()
+        elif file_ext in ['TXT', 'MD', 'PY', 'JSX', 'TSX', 'TS', 'JS', 'JSON', 'SQL', 'NPC', 'JINX', 'LINE', 'YAML', 'DART', 'JAVA']:
+            full_content = load_txt(file_path)
         elif file_ext == 'JSON':
-            with open(file_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-            content = json.dumps(data, indent=2)
-            # Chunk the JSON
-            for i in range(0, len(content), chunk_size):
-                chunk = content[i:i+chunk_size].strip()
-                if chunk:
-                    chunks.append(chunk)
+            data = load_json(file_path)
+            full_content = json.dumps(data, indent=2)
         else:
-            chunks.append(f"Unsupported file format: {file_ext}")
+            return [f"Unsupported file format for content loading: {file_ext}"]
+        if not full_content:
+            return []
+        chunks = []
+        for i in range(0, len(full_content), chunk_size):
+            chunk = full_content[i:i+chunk_size].strip()
+            if chunk:
+                chunks.append(chunk)
         return chunks
     except Exception as e:
-        return [f"Error loading file {file_path}: {str(e)}"]
+        return [f"Error loading file {file_path}: {str(e)}"]

npcpy 1.0.26__py3-none-any.whl → 1.2.32__py3-none-any.whl

npcpy 1.0.26py3-none-any.whl → 1.2.32py3-none-any.whl