PyPI - kopipasta - Versions diffs - 0.38.0__py3-none-any.whl - Mend

kopipasta 0.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of kopipasta might be problematic. Click here for more details.

Files changed (13) hide show

kopipasta/__init__.py +0 -0
kopipasta/cache.py +40 -0
kopipasta/file.py +225 -0
kopipasta/import_parser.py +356 -0
kopipasta/main.py +1449 -0
kopipasta/prompt.py +174 -0
kopipasta/tree_selector.py +791 -0
kopipasta-0.38.0.dist-info/LICENSE +21 -0
kopipasta-0.38.0.dist-info/METADATA +111 -0
kopipasta-0.38.0.dist-info/RECORD +13 -0
kopipasta-0.38.0.dist-info/WHEEL +5 -0
kopipasta-0.38.0.dist-info/entry_points.txt +2 -0
kopipasta-0.38.0.dist-info/top_level.txt +1 -0

kopipasta/__init__.py ADDED Viewed

File without changes

kopipasta/cache.py ADDED Viewed

@@ -0,0 +1,40 @@
+import json
+import os
+from pathlib import Path
+from typing import List, Tuple
+# Define FileTuple for type hinting
+FileTuple = Tuple[str, bool, List[str] | None, str]
+def get_cache_file_path() -> Path:
+    """Gets the cross-platform path to the cache file for the last selection."""
+    cache_dir = Path.home() / ".cache" / "kopipasta"
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    return cache_dir / "last_selection.json"
+def save_selection_to_cache(files_to_include: List[FileTuple]):
+    """Saves the list of selected file relative paths to the cache."""
+    cache_file = get_cache_file_path()
+    relative_paths = sorted([os.path.relpath(f[0]) for f in files_to_include])
+    try:
+        with open(cache_file, "w", encoding="utf-8") as f:
+            json.dump(relative_paths, f, indent=2)
+    except IOError as e:
+        print(f"\nWarning: Could not save selection to cache: {e}")
+def load_selection_from_cache() -> List[str]:
+    """Loads the list of selected files from the cache file."""
+    cache_file = get_cache_file_path()
+    if not cache_file.exists():
+        return []
+    try:
+        with open(cache_file, "r", encoding="utf-8") as f:
+            paths = json.load(f)
+            # Filter out paths that no longer exist
+            return [p for p in paths if os.path.exists(p)]
+    except (IOError, json.JSONDecodeError) as e:
+        print(f"\nWarning: Could not load previous selection from cache: {e}")
+        return []

kopipasta/file.py ADDED Viewed

@@ -0,0 +1,225 @@
+import fnmatch
+import os
+from typing import List, Optional, Tuple, Set
+from pathlib import Path
+FileTuple = Tuple[str, bool, Optional[List[str]], str]
+# --- Caches ---
+_gitignore_cache: dict[str, list[str]] = {}
+_is_ignored_cache: dict[str, bool] = {}
+_is_binary_cache: dict[str, bool] = {}
+# --- Known File Extensions for is_binary ---
+# Using sets for O(1) average time complexity lookups
+TEXT_EXTENSIONS = {
+    # Code
+    ".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".c", ".cpp", ".h", ".hpp",
+    ".cs", ".go", ".rs", ".sh", ".bash", ".ps1", ".rb", ".php", ".swift",
+    ".kt", ".kts", ".scala", ".pl", ".pm", ".tcl",
+    # Markup & Data
+    ".html", ".htm", ".xml", ".css", ".scss", ".sass", ".less", ".json",
+    ".yaml", ".yml", ".toml", ".ini", ".cfg", ".conf", ".md", ".txt", ".rtf",
+    ".csv", ".tsv", ".sql", ".graphql", ".gql",
+    # Config & Other
+    ".gitignore", ".dockerfile", "dockerfile", ".env", ".properties", ".mdx",
+}
+BINARY_EXTENSIONS = {
+    # Images
+    ".png", ".jpg", ".jpeg", ".gif", ".bmp", ".tiff", ".ico", ".webp", ".svg",
+    # Audio/Video
+    ".mp3", ".wav", ".ogg", ".flac", ".mp4", ".avi", ".mov", ".wmv", ".mkv",
+    # Archives
+    ".zip", ".rar", ".7z", ".tar", ".gz", ".bz2", ".xz",
+    # Documents
+    ".pdf", ".doc", ".docx", ".xls", ".xlsx", ".ppt", ".pptx", ".odt",
+    # Executables & Compiled
+    ".exe", ".dll", ".so", ".dylib", ".class", ".jar", ".pyc", ".pyd", ".whl",
+    # Databases & Other
+    ".db", ".sqlite", ".sqlite3", ".db-wal", ".db-shm", ".lock",
+    ".bak", ".swo", ".swp",
+}
+def _read_gitignore_patterns(gitignore_path: str) -> list[str]:
+    """Reads patterns from a single .gitignore file and caches them."""
+    if gitignore_path in _gitignore_cache:
+        return _gitignore_cache[gitignore_path]
+    if not os.path.isfile(gitignore_path):
+        _gitignore_cache[gitignore_path] = []
+        return []
+    patterns = []
+    try:
+        with open(gitignore_path, "r", encoding="utf-8") as f:
+            for line in f:
+                stripped_line = line.strip()
+                if stripped_line and not stripped_line.startswith("#"):
+                    patterns.append(stripped_line)
+    except IOError:
+        pass
+    _gitignore_cache[gitignore_path] = patterns
+    return patterns
+def is_ignored(
+    path: str, default_ignore_patterns: list[str], project_root: Optional[str] = None
+) -> bool:
+    """
+    Checks if a path should be ignored by splitting patterns into fast (basename)
+    and slow (full path) checks, with heavy caching and optimized inner loops.
+    """
+    path_abs = os.path.abspath(path)
+    if path_abs in _is_ignored_cache:
+        return _is_ignored_cache[path_abs]
+    parent_dir = os.path.dirname(path_abs)
+    if parent_dir != path_abs and _is_ignored_cache.get(parent_dir, False):
+        _is_ignored_cache[path_abs] = True
+        return True
+    if project_root is None:
+        project_root = os.getcwd()
+    project_root_abs = os.path.abspath(project_root)
+    basename_patterns, path_patterns = get_all_patterns(
+        default_ignore_patterns, path_abs, project_root_abs
+    )
+    # --- Step 1: Fast check for basename patterns ---
+    path_basename = os.path.basename(path_abs)
+    for pattern in basename_patterns:
+        if fnmatch.fnmatch(path_basename, pattern):
+            _is_ignored_cache[path_abs] = True
+            return True
+    # --- Step 2: Optimized nested check for path patterns ---
+    try:
+        path_rel_to_root = os.path.relpath(path_abs, project_root_abs)
+    except ValueError:
+        _is_ignored_cache[path_abs] = False
+        return False
+    # Pre-calculate all path prefixes to check, avoiding re-joins in the loop.
+    path_parts = Path(path_rel_to_root).parts
+    path_prefixes = [os.path.join(*path_parts[:i]) for i in range(1, len(path_parts) + 1)]
+    # Pre-process patterns to remove trailing slashes once.
+    processed_path_patterns = [p.rstrip("/") for p in path_patterns]
+    for prefix in path_prefixes:
+        for pattern in processed_path_patterns:
+            if fnmatch.fnmatch(prefix, pattern):
+                _is_ignored_cache[path_abs] = True
+                return True
+    _is_ignored_cache[path_abs] = False
+    return False
+def get_all_patterns(default_ignore_patterns, path_abs, project_root_abs) -> Tuple[Set[str], Set[str]]:
+    """
+    Gathers all applicable ignore patterns, splitting them into two sets
+    for optimized checking: one for basenames, one for full paths.
+    """
+    basename_patterns = set()
+    path_patterns = set()
+    for p in default_ignore_patterns:
+        if "/" in p:
+            path_patterns.add(p)
+        else:
+            basename_patterns.add(p)
+    search_start_dir = (
+        path_abs if os.path.isdir(path_abs) else os.path.dirname(path_abs)
+    )
+    current_dir = search_start_dir
+    while True:
+        gitignore_path = os.path.join(current_dir, ".gitignore")
+        patterns_from_file = _read_gitignore_patterns(gitignore_path)
+        if patterns_from_file:
+            gitignore_dir_rel = os.path.relpath(current_dir, project_root_abs)
+            if gitignore_dir_rel == ".":
+                gitignore_dir_rel = ""
+            for p in patterns_from_file:
+                if "/" in p:
+                    # Path patterns are relative to the .gitignore file's location
+                    path_patterns.add(os.path.join(gitignore_dir_rel, p.lstrip("/")))
+                else:
+                    basename_patterns.add(p)
+        if (
+            not current_dir.startswith(project_root_abs)
+            or current_dir == project_root_abs
+        ):
+            break
+        parent = os.path.dirname(current_dir)
+        if parent == current_dir:
+            break
+        current_dir = parent
+    return basename_patterns, path_patterns
+def read_file_contents(file_path):
+    try:
+        with open(file_path, "r") as file:
+            return file.read()
+    except Exception as e:
+        print(f"Error reading {file_path}: {e}")
+        return ""
+def is_binary(file_path: str) -> bool:
+    """
+    Efficiently checks if a file is binary.
+    The check follows a fast, multi-step process to minimize I/O:
+    1. Checks a memory cache for a previously determined result.
+    2. Checks the file extension against a list of known text file types.
+    3. Checks the file extension against a list of known binary file types.
+    4. As a last resort, reads the first 512 bytes of the file to check for
+       a null byte, a common indicator of a binary file.
+    """
+    # Step 1: Check cache first for fastest response
+    if file_path in _is_binary_cache:
+        return _is_binary_cache[file_path]
+    # Step 2: Fast check based on known text/binary extensions (no I/O)
+    _, extension = os.path.splitext(file_path)
+    extension = extension.lower()
+    if extension in TEXT_EXTENSIONS:
+        _is_binary_cache[file_path] = False
+        return False
+    if extension in BINARY_EXTENSIONS:
+        _is_binary_cache[file_path] = True
+        return True
+    # Step 3: Fallback to content analysis for unknown extensions
+    try:
+        with open(file_path, "rb") as file:
+            # Read a smaller chunk, 512 bytes is usually enough to find a null byte
+            chunk = file.read(512)
+            if b"\0" in chunk:
+                _is_binary_cache[file_path] = True
+                return True
+            # If no null byte, assume it's a text file
+            _is_binary_cache[file_path] = False
+            return False
+    except IOError:
+        # If we can't open it, treat it as binary to be safe
+        _is_binary_cache[file_path] = True
+        return True
+def get_human_readable_size(size):
+    for unit in ["B", "KB", "MB", "GB", "TB"]:
+        if size < 1024.0:
+            return f"{size:.2f} {unit}"
+        size /= 1024.0
+def is_large_file(file_path, threshold=102400):
+    return os.path.getsize(file_path) > threshold

kopipasta/import_parser.py ADDED Viewed

@@ -0,0 +1,356 @@
+import os
+import re
+import json
+import ast
+from typing import Dict, List, Optional, Set, Tuple
+# --- Global Cache for tsconfig.json data ---
+# Key: absolute path to tsconfig.json file
+# Value: Tuple (absolute_base_url: Optional[str], alias_paths_map: Dict[str, List[str]])
+_tsconfig_configs_cache: Dict[str, Tuple[Optional[str], Dict[str, List[str]]]] = {}
+# --- TypeScript Alias and Import Resolution ---
+def find_relevant_tsconfig_path(
+    file_path_abs: str, project_root_abs: str
+) -> Optional[str]:
+    """
+    Finds the most relevant tsconfig.json by searching upwards from the file's directory,
+    stopping at project_root_abs.
+    Searches for 'tsconfig.json' first, then 'tsconfig.*.json' in each directory.
+    """
+    current_dir = os.path.dirname(os.path.normpath(file_path_abs))
+    project_root_abs_norm = os.path.normpath(project_root_abs)
+    while current_dir.startswith(project_root_abs_norm) and len(current_dir) >= len(
+        project_root_abs_norm
+    ):
+        potential_tsconfig = os.path.join(current_dir, "tsconfig.json")
+        if os.path.isfile(potential_tsconfig):
+            return os.path.normpath(potential_tsconfig)
+        try:
+            variant_tsconfigs = sorted(
+                [
+                    f
+                    for f in os.listdir(current_dir)
+                    if f.startswith("tsconfig.")
+                    and f.endswith(".json")
+                    and os.path.isfile(os.path.join(current_dir, f))
+                ]
+            )
+            if variant_tsconfigs:
+                return os.path.normpath(os.path.join(current_dir, variant_tsconfigs[0]))
+        except OSError:
+            pass
+        if current_dir == project_root_abs_norm:
+            break
+        parent_dir = os.path.dirname(current_dir)
+        if parent_dir == current_dir:
+            break
+        current_dir = parent_dir
+    return None
+def load_tsconfig_config(
+    tsconfig_path_abs: str,
+) -> Tuple[Optional[str], Dict[str, List[str]]]:
+    """
+    Loads baseUrl and paths from a specific tsconfig.json.
+    Caches results.
+    Returns (absolute_base_url, paths_map).
+    """
+    if tsconfig_path_abs in _tsconfig_configs_cache:
+        return _tsconfig_configs_cache[tsconfig_path_abs]
+    if not os.path.isfile(tsconfig_path_abs):
+        _tsconfig_configs_cache[tsconfig_path_abs] = (None, {})
+        return None, {}
+    try:
+        with open(tsconfig_path_abs, "r", encoding="utf-8") as f:
+            content = f.read()
+            content = re.sub(r"//.*?\n", "\n", content)
+            content = re.sub(r"/\*.*?\*/", "", content, flags=re.DOTALL)
+            config = json.loads(content)
+        compiler_options = config.get("compilerOptions", {})
+        tsconfig_dir = os.path.dirname(tsconfig_path_abs)
+        base_url_from_config = compiler_options.get("baseUrl", ".")
+        abs_base_url = os.path.normpath(
+            os.path.join(tsconfig_dir, base_url_from_config)
+        )
+        paths = compiler_options.get("paths", {})
+        processed_paths = {
+            key: (val if isinstance(val, list) else [val]) for key, val in paths.items()
+        }
+        # print(f"DEBUG: Loaded config from {os.path.relpath(tsconfig_path_abs)}: effective abs_baseUrl='{abs_base_url}', {len(processed_paths)} path alias(es).")
+        _tsconfig_configs_cache[tsconfig_path_abs] = (abs_base_url, processed_paths)
+        return abs_base_url, processed_paths
+    except Exception as e:
+        print(f"Warning: Could not parse {os.path.relpath(tsconfig_path_abs)}: {e}")
+        _tsconfig_configs_cache[tsconfig_path_abs] = (None, {})
+        return None, {}
+def _probe_ts_path_candidates(candidate_base_path_abs: str) -> Optional[str]:
+    """
+    Given a candidate base absolute path, tries to find a corresponding file.
+    """
+    possible_extensions = [".ts", ".tsx", ".js", ".jsx", ".json"]
+    if os.path.isfile(candidate_base_path_abs):
+        return candidate_base_path_abs
+    stem, original_ext = os.path.splitext(candidate_base_path_abs)
+    base_for_ext_check = (
+        stem if original_ext.lower() in possible_extensions else candidate_base_path_abs
+    )
+    for ext in possible_extensions:
+        path_with_ext = base_for_ext_check + ext
+        if os.path.isfile(path_with_ext):
+            return path_with_ext
+    if os.path.isdir(base_for_ext_check):
+        for ext in possible_extensions:
+            index_file_path = os.path.join(base_for_ext_check, "index" + ext)
+            if os.path.isfile(index_file_path):
+                return index_file_path
+    return None
+def resolve_ts_import_path(
+    import_str: str,
+    current_file_dir_abs: str,
+    abs_base_url: Optional[str],
+    alias_map: Dict[str, List[str]],
+) -> Optional[str]:
+    """
+    Resolves a TypeScript import string to an absolute file path.
+    """
+    candidate_targets_abs: List[str] = []
+    sorted_alias_keys = sorted(alias_map.keys(), key=len, reverse=True)
+    alias_matched_and_resolved = False
+    for alias_pattern in sorted_alias_keys:
+        alias_prefix_pattern = alias_pattern.replace("/*", "")
+        if import_str.startswith(alias_prefix_pattern):
+            import_suffix = import_str[len(alias_prefix_pattern) :]
+            for mapping_path_template_list in alias_map[alias_pattern]:
+                for mapping_path_template in (
+                    mapping_path_template_list
+                    if isinstance(mapping_path_template_list, list)
+                    else [mapping_path_template_list]
+                ):
+                    if "/*" in alias_pattern:
+                        resolved_relative_to_base = mapping_path_template.replace(
+                            "*", import_suffix, 1
+                        )
+                    else:
+                        resolved_relative_to_base = mapping_path_template
+                    if abs_base_url:
+                        abs_candidate = os.path.normpath(
+                            os.path.join(abs_base_url, resolved_relative_to_base)
+                        )
+                        candidate_targets_abs.append(abs_candidate)
+                    else:
+                        print(
+                            f"Warning: TS Alias '{alias_pattern}' used, but no abs_base_url for context of '{current_file_dir_abs}'."
+                        )
+            if candidate_targets_abs:
+                alias_matched_and_resolved = True
+                break
+    if not alias_matched_and_resolved and import_str.startswith("."):
+        abs_candidate = os.path.normpath(os.path.join(current_file_dir_abs, import_str))
+        candidate_targets_abs.append(abs_candidate)
+    elif (
+        not alias_matched_and_resolved
+        and abs_base_url
+        and not import_str.startswith(".")
+    ):
+        abs_candidate = os.path.normpath(os.path.join(abs_base_url, import_str))
+        candidate_targets_abs.append(abs_candidate)
+    for cand_abs_path in candidate_targets_abs:
+        resolved_file = _probe_ts_path_candidates(cand_abs_path)
+        if resolved_file:
+            return resolved_file
+    return None
+def parse_typescript_imports(
+    file_content: str, file_path_abs: str, project_root_abs: str
+) -> Set[str]:
+    resolved_imports_abs_paths = set()
+    relevant_tsconfig_abs_path = find_relevant_tsconfig_path(
+        file_path_abs, project_root_abs
+    )
+    abs_base_url, alias_map = None, {}
+    if relevant_tsconfig_abs_path:
+        abs_base_url, alias_map = load_tsconfig_config(relevant_tsconfig_abs_path)
+    else:
+        # print(f"Warning: No tsconfig.json found for {os.path.relpath(file_path_abs, project_root_abs)}. Import resolution might be limited.")
+        abs_base_url = project_root_abs
+    import_regex = re.compile(
+        r"""
+        (?:import|export)
+        (?:\s+(?:type\s+)?(?:[\w*{}\s,\[\]:\."'`-]+)\s+from)?
+        \s*['"`]([^'"\n`]+?)['"`]
+        |require\s*\(\s*['"`]([^'"\n`]+?)['"`]\s*\)
+        |import\s*\(\s*['"`]([^'"\n`]+?)['"`]\s*\)
+        """,
+        re.VERBOSE | re.MULTILINE,
+    )
+    current_file_dir_abs = os.path.dirname(file_path_abs)
+    for match in import_regex.finditer(file_content):
+        import_str_candidate = next((g for g in match.groups() if g is not None), None)
+        if import_str_candidate:
+            is_likely_external = (
+                not import_str_candidate.startswith((".", "/"))
+                and not any(
+                    import_str_candidate.startswith(alias_pattern.replace("/*", ""))
+                    for alias_pattern in alias_map
+                )
+                and not (
+                    abs_base_url
+                    and os.path.exists(os.path.join(abs_base_url, import_str_candidate))
+                )
+                and (
+                    import_str_candidate.count("/") == 0
+                    or (
+                        import_str_candidate.startswith("@")
+                        and import_str_candidate.count("/") == 1
+                    )
+                )
+                and "." not in import_str_candidate.split("/")[0]
+            )
+            if is_likely_external:
+                continue
+            resolved_abs_path = resolve_ts_import_path(
+                import_str_candidate, current_file_dir_abs, abs_base_url, alias_map
+            )
+            if resolved_abs_path:
+                norm_resolved_path = os.path.normpath(resolved_abs_path)
+                if norm_resolved_path.startswith(os.path.normpath(project_root_abs)):
+                    resolved_imports_abs_paths.add(norm_resolved_path)
+    return resolved_imports_abs_paths
+# --- Python Import Resolution ---
+def resolve_python_import(
+    module_name_parts: List[str],
+    current_file_dir_abs: str,
+    project_root_abs: str,
+    level: int,
+) -> Optional[str]:
+    base_path_to_search = ""
+    if level > 0:
+        base_path_to_search = current_file_dir_abs
+        for _ in range(level - 1):
+            base_path_to_search = os.path.dirname(base_path_to_search)
+    else:
+        base_path_to_search = project_root_abs
+    candidate_rel_path = os.path.join(*module_name_parts)
+    potential_abs_path = os.path.join(base_path_to_search, candidate_rel_path)
+    py_file = potential_abs_path + ".py"
+    if os.path.isfile(py_file):
+        return os.path.normpath(py_file)
+    init_file = os.path.join(potential_abs_path, "__init__.py")
+    if os.path.isdir(potential_abs_path) and os.path.isfile(init_file):
+        return os.path.normpath(init_file)
+    if level == 0 and base_path_to_search == project_root_abs:
+        src_base_path = os.path.join(project_root_abs, "src")
+        if os.path.isdir(src_base_path):
+            potential_abs_path_src = os.path.join(src_base_path, candidate_rel_path)
+            py_file_src = potential_abs_path_src + ".py"
+            if os.path.isfile(py_file_src):
+                return os.path.normpath(py_file_src)
+            init_file_src = os.path.join(potential_abs_path_src, "__init__.py")
+            if os.path.isdir(potential_abs_path_src) and os.path.isfile(init_file_src):
+                return os.path.normpath(init_file_src)
+    return None
+def parse_python_imports(
+    file_content: str, file_path_abs: str, project_root_abs: str
+) -> Set[str]:
+    resolved_imports = set()
+    current_file_dir_abs = os.path.dirname(file_path_abs)
+    try:
+        tree = ast.parse(file_content, filename=file_path_abs)
+    except SyntaxError:
+        # print(f"Warning: Syntax error in {file_path_abs}, cannot parse Python imports.")
+        return resolved_imports
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                module_parts = alias.name.split(".")
+                resolved = resolve_python_import(
+                    module_parts, current_file_dir_abs, project_root_abs, level=0
+                )
+                if (
+                    resolved
+                    and os.path.exists(resolved)
+                    and os.path.normpath(resolved).startswith(
+                        os.path.normpath(project_root_abs)
+                    )
+                ):
+                    resolved_imports.add(os.path.normpath(resolved))
+        elif isinstance(node, ast.ImportFrom):
+            level_to_resolve = node.level
+            if node.module:
+                module_parts = node.module.split(".")
+                resolved = resolve_python_import(
+                    module_parts,
+                    current_file_dir_abs,
+                    project_root_abs,
+                    level_to_resolve,
+                )
+                if (
+                    resolved
+                    and os.path.exists(resolved)
+                    and os.path.normpath(resolved).startswith(
+                        os.path.normpath(project_root_abs)
+                    )
+                ):
+                    resolved_imports.add(os.path.normpath(resolved))
+            else:
+                for alias in node.names:
+                    item_name_parts = alias.name.split(".")
+                    resolved = resolve_python_import(
+                        item_name_parts,
+                        current_file_dir_abs,
+                        project_root_abs,
+                        level=level_to_resolve,
+                    )
+                    if (
+                        resolved
+                        and os.path.exists(resolved)
+                        and os.path.normpath(resolved).startswith(
+                            os.path.normpath(project_root_abs)
+                        )
+                    ):
+                        resolved_imports.add(os.path.normpath(resolved))
+    return resolved_imports