shotgun-sh 0.4.0.dev1__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. shotgun/agents/agent_manager.py +307 -8
  2. shotgun/agents/cancellation.py +103 -0
  3. shotgun/agents/common.py +12 -0
  4. shotgun/agents/config/README.md +0 -1
  5. shotgun/agents/config/manager.py +10 -7
  6. shotgun/agents/config/models.py +5 -27
  7. shotgun/agents/config/provider.py +44 -27
  8. shotgun/agents/conversation/history/token_counting/base.py +51 -9
  9. shotgun/agents/file_read.py +176 -0
  10. shotgun/agents/messages.py +15 -3
  11. shotgun/agents/models.py +24 -1
  12. shotgun/agents/router/models.py +8 -0
  13. shotgun/agents/router/tools/delegation_tools.py +55 -1
  14. shotgun/agents/router/tools/plan_tools.py +88 -7
  15. shotgun/agents/runner.py +17 -2
  16. shotgun/agents/tools/__init__.py +8 -0
  17. shotgun/agents/tools/codebase/directory_lister.py +27 -39
  18. shotgun/agents/tools/codebase/file_read.py +26 -35
  19. shotgun/agents/tools/codebase/query_graph.py +9 -0
  20. shotgun/agents/tools/codebase/retrieve_code.py +9 -0
  21. shotgun/agents/tools/file_management.py +32 -2
  22. shotgun/agents/tools/file_read_tools/__init__.py +7 -0
  23. shotgun/agents/tools/file_read_tools/multimodal_file_read.py +167 -0
  24. shotgun/agents/tools/markdown_tools/__init__.py +62 -0
  25. shotgun/agents/tools/markdown_tools/insert_section.py +148 -0
  26. shotgun/agents/tools/markdown_tools/models.py +86 -0
  27. shotgun/agents/tools/markdown_tools/remove_section.py +114 -0
  28. shotgun/agents/tools/markdown_tools/replace_section.py +119 -0
  29. shotgun/agents/tools/markdown_tools/utils.py +453 -0
  30. shotgun/agents/tools/registry.py +44 -6
  31. shotgun/agents/tools/web_search/openai.py +42 -23
  32. shotgun/attachments/__init__.py +41 -0
  33. shotgun/attachments/errors.py +60 -0
  34. shotgun/attachments/models.py +107 -0
  35. shotgun/attachments/parser.py +257 -0
  36. shotgun/attachments/processor.py +193 -0
  37. shotgun/build_constants.py +4 -7
  38. shotgun/cli/clear.py +2 -2
  39. shotgun/cli/codebase/commands.py +181 -65
  40. shotgun/cli/compact.py +2 -2
  41. shotgun/cli/context.py +2 -2
  42. shotgun/cli/error_handler.py +2 -2
  43. shotgun/cli/run.py +90 -0
  44. shotgun/cli/spec/backup.py +2 -1
  45. shotgun/codebase/__init__.py +2 -0
  46. shotgun/codebase/benchmarks/__init__.py +35 -0
  47. shotgun/codebase/benchmarks/benchmark_runner.py +309 -0
  48. shotgun/codebase/benchmarks/exporters.py +119 -0
  49. shotgun/codebase/benchmarks/formatters/__init__.py +49 -0
  50. shotgun/codebase/benchmarks/formatters/base.py +34 -0
  51. shotgun/codebase/benchmarks/formatters/json_formatter.py +106 -0
  52. shotgun/codebase/benchmarks/formatters/markdown.py +136 -0
  53. shotgun/codebase/benchmarks/models.py +129 -0
  54. shotgun/codebase/core/__init__.py +4 -0
  55. shotgun/codebase/core/call_resolution.py +91 -0
  56. shotgun/codebase/core/change_detector.py +11 -6
  57. shotgun/codebase/core/errors.py +159 -0
  58. shotgun/codebase/core/extractors/__init__.py +23 -0
  59. shotgun/codebase/core/extractors/base.py +138 -0
  60. shotgun/codebase/core/extractors/factory.py +63 -0
  61. shotgun/codebase/core/extractors/go/__init__.py +7 -0
  62. shotgun/codebase/core/extractors/go/extractor.py +122 -0
  63. shotgun/codebase/core/extractors/javascript/__init__.py +7 -0
  64. shotgun/codebase/core/extractors/javascript/extractor.py +132 -0
  65. shotgun/codebase/core/extractors/protocol.py +109 -0
  66. shotgun/codebase/core/extractors/python/__init__.py +7 -0
  67. shotgun/codebase/core/extractors/python/extractor.py +141 -0
  68. shotgun/codebase/core/extractors/rust/__init__.py +7 -0
  69. shotgun/codebase/core/extractors/rust/extractor.py +139 -0
  70. shotgun/codebase/core/extractors/types.py +15 -0
  71. shotgun/codebase/core/extractors/typescript/__init__.py +7 -0
  72. shotgun/codebase/core/extractors/typescript/extractor.py +92 -0
  73. shotgun/codebase/core/gitignore.py +252 -0
  74. shotgun/codebase/core/ingestor.py +644 -354
  75. shotgun/codebase/core/kuzu_compat.py +119 -0
  76. shotgun/codebase/core/language_config.py +239 -0
  77. shotgun/codebase/core/manager.py +256 -46
  78. shotgun/codebase/core/metrics_collector.py +310 -0
  79. shotgun/codebase/core/metrics_types.py +347 -0
  80. shotgun/codebase/core/parallel_executor.py +424 -0
  81. shotgun/codebase/core/work_distributor.py +254 -0
  82. shotgun/codebase/core/worker.py +768 -0
  83. shotgun/codebase/indexing_state.py +86 -0
  84. shotgun/codebase/models.py +94 -0
  85. shotgun/codebase/service.py +13 -0
  86. shotgun/exceptions.py +9 -9
  87. shotgun/main.py +3 -16
  88. shotgun/posthog_telemetry.py +165 -24
  89. shotgun/prompts/agents/file_read.j2 +48 -0
  90. shotgun/prompts/agents/partials/common_agent_system_prompt.j2 +19 -47
  91. shotgun/prompts/agents/partials/content_formatting.j2 +12 -33
  92. shotgun/prompts/agents/partials/interactive_mode.j2 +9 -32
  93. shotgun/prompts/agents/partials/router_delegation_mode.j2 +21 -22
  94. shotgun/prompts/agents/plan.j2 +14 -0
  95. shotgun/prompts/agents/router.j2 +531 -258
  96. shotgun/prompts/agents/specify.j2 +14 -0
  97. shotgun/prompts/agents/state/codebase/codebase_graphs_available.j2 +14 -1
  98. shotgun/prompts/agents/state/system_state.j2 +13 -11
  99. shotgun/prompts/agents/tasks.j2 +14 -0
  100. shotgun/settings.py +49 -10
  101. shotgun/tui/app.py +149 -18
  102. shotgun/tui/commands/__init__.py +9 -1
  103. shotgun/tui/components/attachment_bar.py +87 -0
  104. shotgun/tui/components/prompt_input.py +25 -28
  105. shotgun/tui/components/status_bar.py +14 -7
  106. shotgun/tui/dependencies.py +3 -8
  107. shotgun/tui/protocols.py +18 -0
  108. shotgun/tui/screens/chat/chat.tcss +15 -0
  109. shotgun/tui/screens/chat/chat_screen.py +766 -235
  110. shotgun/tui/screens/chat/codebase_index_prompt_screen.py +8 -4
  111. shotgun/tui/screens/chat_screen/attachment_hint.py +40 -0
  112. shotgun/tui/screens/chat_screen/command_providers.py +0 -10
  113. shotgun/tui/screens/chat_screen/history/chat_history.py +54 -14
  114. shotgun/tui/screens/chat_screen/history/formatters.py +22 -0
  115. shotgun/tui/screens/chat_screen/history/user_question.py +25 -3
  116. shotgun/tui/screens/database_locked_dialog.py +219 -0
  117. shotgun/tui/screens/database_timeout_dialog.py +158 -0
  118. shotgun/tui/screens/kuzu_error_dialog.py +135 -0
  119. shotgun/tui/screens/model_picker.py +1 -3
  120. shotgun/tui/screens/models.py +11 -0
  121. shotgun/tui/state/processing_state.py +19 -0
  122. shotgun/tui/widgets/widget_coordinator.py +18 -0
  123. shotgun/utils/file_system_utils.py +4 -1
  124. {shotgun_sh-0.4.0.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/METADATA +87 -34
  125. {shotgun_sh-0.4.0.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/RECORD +128 -79
  126. shotgun/cli/export.py +0 -81
  127. shotgun/cli/plan.py +0 -73
  128. shotgun/cli/research.py +0 -93
  129. shotgun/cli/specify.py +0 -70
  130. shotgun/cli/tasks.py +0 -78
  131. shotgun/sentry_telemetry.py +0 -232
  132. shotgun/tui/screens/onboarding.py +0 -584
  133. {shotgun_sh-0.4.0.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/WHEEL +0 -0
  134. {shotgun_sh-0.4.0.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/entry_points.txt +0 -0
  135. {shotgun_sh-0.4.0.dev1.dist-info → shotgun_sh-0.6.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,141 @@
1
+ """Python language extractor implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from shotgun.codebase.core.extractors.base import BaseExtractor
8
+ from shotgun.codebase.core.extractors.types import SupportedLanguage
9
+
10
+ if TYPE_CHECKING:
11
+ from tree_sitter import Node
12
+
13
+
14
+ class PythonExtractor(BaseExtractor):
15
+ """Extractor for Python source code."""
16
+
17
+ @property
18
+ def language(self) -> SupportedLanguage:
19
+ """The language this extractor handles."""
20
+ return SupportedLanguage.PYTHON
21
+
22
+ def _class_definition_types(self) -> list[str]:
23
+ """Return node types that represent class definitions."""
24
+ return ["class_definition"]
25
+
26
+ def _function_definition_types(self) -> list[str]:
27
+ """Return node types that represent function definitions."""
28
+ return ["function_definition"]
29
+
30
+ def extract_decorators(self, node: Node) -> list[str]:
31
+ """Extract decorators from a function or class node.
32
+
33
+ Args:
34
+ node: The AST node (function or class definition)
35
+
36
+ Returns:
37
+ List of decorator names
38
+ """
39
+ decorators: list[str] = []
40
+
41
+ for child in node.children:
42
+ if child.type == "decorator":
43
+ for grandchild in child.children:
44
+ if grandchild.type == "identifier" and grandchild.text:
45
+ decorators.append(grandchild.text.decode("utf-8"))
46
+ break
47
+ elif grandchild.type == "attribute":
48
+ attr_node = grandchild.child_by_field_name("attribute")
49
+ if attr_node and attr_node.text:
50
+ decorators.append(attr_node.text.decode("utf-8"))
51
+ break
52
+
53
+ return decorators
54
+
55
+ def extract_docstring(self, node: Node) -> str | None:
56
+ """Extract docstring from a function or class node.
57
+
58
+ Args:
59
+ node: The AST node (function or class definition)
60
+
61
+ Returns:
62
+ The docstring content, or None if not present
63
+ """
64
+ body_node = node.child_by_field_name("body")
65
+ if not body_node or not body_node.children:
66
+ return None
67
+
68
+ first_statement = body_node.children[0]
69
+ if first_statement.type == "expression_statement":
70
+ for child in first_statement.children:
71
+ if child.type == "string" and child.text:
72
+ docstring = child.text.decode("utf-8")
73
+ docstring = docstring.strip()
74
+ if (
75
+ docstring.startswith('"""')
76
+ and docstring.endswith('"""')
77
+ or docstring.startswith("'''")
78
+ and docstring.endswith("'''")
79
+ ):
80
+ docstring = docstring[3:-3]
81
+ elif (
82
+ docstring.startswith('"')
83
+ and docstring.endswith('"')
84
+ or docstring.startswith("'")
85
+ and docstring.endswith("'")
86
+ ):
87
+ docstring = docstring[1:-1]
88
+ return docstring.strip()
89
+
90
+ return None
91
+
92
+ def extract_inheritance(self, class_node: Node) -> list[str]:
93
+ """Extract parent class names from a class definition.
94
+
95
+ Args:
96
+ class_node: The class definition AST node
97
+
98
+ Returns:
99
+ List of parent class names (simple names, may need resolution)
100
+ """
101
+ parent_names: list[str] = []
102
+
103
+ for child in class_node.children:
104
+ if child.type == "argument_list":
105
+ for arg in child.children:
106
+ if arg.type == "identifier" and arg.text:
107
+ parent_names.append(arg.text.decode("utf-8"))
108
+ elif arg.type == "attribute":
109
+ full_name_parts: list[str] = []
110
+ self._extract_full_name(arg, full_name_parts)
111
+ if full_name_parts:
112
+ parent_names.append(".".join(full_name_parts))
113
+
114
+ return parent_names
115
+
116
+ def parse_call_node(self, call_node: Node) -> tuple[str | None, str | None]:
117
+ """Parse a call expression node to extract callee information.
118
+
119
+ Args:
120
+ call_node: The call expression AST node
121
+
122
+ Returns:
123
+ Tuple of (callee_name, object_name)
124
+ """
125
+ callee_name = None
126
+ object_name = None
127
+
128
+ for child in call_node.children:
129
+ if child.type == "identifier" and child.text:
130
+ callee_name = child.text.decode("utf-8")
131
+ break
132
+ elif child.type == "attribute":
133
+ obj_node = child.child_by_field_name("object")
134
+ attr_node = child.child_by_field_name("attribute")
135
+ if obj_node and obj_node.text:
136
+ object_name = obj_node.text.decode("utf-8")
137
+ if attr_node and attr_node.text:
138
+ callee_name = attr_node.text.decode("utf-8")
139
+ break
140
+
141
+ return callee_name, object_name
@@ -0,0 +1,7 @@
1
+ """Rust language extractor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .extractor import RustExtractor
6
+
7
+ __all__ = ["RustExtractor"]
@@ -0,0 +1,139 @@
1
+ """Rust language extractor implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from shotgun.codebase.core.extractors.base import BaseExtractor
8
+ from shotgun.codebase.core.extractors.types import SupportedLanguage
9
+
10
+ if TYPE_CHECKING:
11
+ from tree_sitter import Node
12
+
13
+
14
+ class RustExtractor(BaseExtractor):
15
+ """Extractor for Rust source code."""
16
+
17
+ @property
18
+ def language(self) -> SupportedLanguage:
19
+ """The language this extractor handles."""
20
+ return SupportedLanguage.RUST
21
+
22
+ def _class_definition_types(self) -> list[str]:
23
+ """Return node types that represent type definitions.
24
+
25
+ Rust has structs, enums, traits, and type aliases.
26
+ """
27
+ return ["struct_item", "enum_item", "trait_item", "type_item"]
28
+
29
+ def _function_definition_types(self) -> list[str]:
30
+ """Return node types that represent function definitions."""
31
+ return ["function_item", "function_signature_item"]
32
+
33
+ def extract_decorators(self, node: Node) -> list[str]:
34
+ """Extract attributes from a function or struct node.
35
+
36
+ Rust uses #[attribute] syntax.
37
+
38
+ Args:
39
+ node: The AST node
40
+
41
+ Returns:
42
+ List of attribute names
43
+ """
44
+ attributes: list[str] = []
45
+
46
+ for child in node.children:
47
+ if child.type == "attribute_item":
48
+ for attr_child in child.children:
49
+ if attr_child.type == "attribute":
50
+ path = attr_child.child_by_field_name("path")
51
+ if path and path.text:
52
+ attributes.append(path.text.decode("utf-8"))
53
+
54
+ return attributes
55
+
56
+ def extract_docstring(self, node: Node) -> str | None:
57
+ """Extract doc comment from a function or type node.
58
+
59
+ Rust uses /// for outer doc comments and //! for inner doc comments.
60
+
61
+ Args:
62
+ node: The AST node
63
+
64
+ Returns:
65
+ The doc comment, or None if not present
66
+ """
67
+ doc_lines: list[str] = []
68
+ prev_sibling = node.prev_named_sibling
69
+
70
+ while prev_sibling and prev_sibling.type == "line_comment":
71
+ comment_text = prev_sibling.text
72
+ if comment_text:
73
+ text = comment_text.decode("utf-8")
74
+ if text.startswith("///"):
75
+ doc_lines.insert(0, text[3:].strip())
76
+ else:
77
+ break
78
+ prev_sibling = prev_sibling.prev_named_sibling
79
+
80
+ if doc_lines:
81
+ return "\n".join(doc_lines)
82
+ return None
83
+
84
+ def extract_inheritance(self, class_node: Node) -> list[str]:
85
+ """Extract trait bounds or supertraits from a type definition.
86
+
87
+ For structs, this returns nothing (Rust doesn't have struct inheritance).
88
+ For traits, this returns supertraits.
89
+
90
+ Args:
91
+ class_node: The type definition AST node
92
+
93
+ Returns:
94
+ List of supertrait names
95
+ """
96
+ supertraits: list[str] = []
97
+
98
+ if class_node.type == "trait_item":
99
+ for child in class_node.children:
100
+ if child.type == "trait_bounds":
101
+ for bound in child.children:
102
+ if bound.type == "type_identifier" and bound.text:
103
+ supertraits.append(bound.text.decode("utf-8"))
104
+ elif bound.type == "generic_type":
105
+ type_node = bound.child_by_field_name("type")
106
+ if type_node and type_node.text:
107
+ supertraits.append(type_node.text.decode("utf-8"))
108
+
109
+ return supertraits
110
+
111
+ def parse_call_node(self, call_node: Node) -> tuple[str | None, str | None]:
112
+ """Parse a call expression node to extract callee information.
113
+
114
+ Args:
115
+ call_node: The call expression AST node
116
+
117
+ Returns:
118
+ Tuple of (callee_name, object_name)
119
+ """
120
+ callee_name = None
121
+ object_name = None
122
+
123
+ func_node = call_node.child_by_field_name("function")
124
+ if func_node:
125
+ if func_node.type == "identifier" and func_node.text:
126
+ callee_name = func_node.text.decode("utf-8")
127
+ elif func_node.type == "field_expression":
128
+ value = func_node.child_by_field_name("value")
129
+ field = func_node.child_by_field_name("field")
130
+ if value and value.text:
131
+ object_name = value.text.decode("utf-8")
132
+ if field and field.text:
133
+ callee_name = field.text.decode("utf-8")
134
+ elif func_node.type == "scoped_identifier":
135
+ name = func_node.child_by_field_name("name")
136
+ if name and name.text:
137
+ callee_name = name.text.decode("utf-8")
138
+
139
+ return callee_name, object_name
@@ -0,0 +1,15 @@
1
+ """Type definitions for the extractors module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from enum import StrEnum
6
+
7
+
8
+ class SupportedLanguage(StrEnum):
9
+ """Supported programming languages for AST extraction."""
10
+
11
+ PYTHON = "python"
12
+ JAVASCRIPT = "javascript"
13
+ TYPESCRIPT = "typescript"
14
+ GO = "go"
15
+ RUST = "rust"
@@ -0,0 +1,7 @@
1
+ """TypeScript language extractor."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from .extractor import TypeScriptExtractor
6
+
7
+ __all__ = ["TypeScriptExtractor"]
@@ -0,0 +1,92 @@
1
+ """TypeScript language extractor implementation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from shotgun.codebase.core.extractors.javascript.extractor import JavaScriptExtractor
8
+ from shotgun.codebase.core.extractors.types import SupportedLanguage
9
+
10
+ if TYPE_CHECKING:
11
+ from tree_sitter import Node
12
+
13
+
14
+ class TypeScriptExtractor(JavaScriptExtractor):
15
+ """Extractor for TypeScript source code.
16
+
17
+ TypeScript is a superset of JavaScript, so this extends
18
+ JavaScriptExtractor with TypeScript-specific features.
19
+ """
20
+
21
+ @property
22
+ def language(self) -> SupportedLanguage:
23
+ """The language this extractor handles."""
24
+ return SupportedLanguage.TYPESCRIPT
25
+
26
+ def _class_definition_types(self) -> list[str]:
27
+ """Return node types that represent class definitions.
28
+
29
+ TypeScript adds interface_declaration and type_alias_declaration.
30
+ """
31
+ return [
32
+ "class_declaration",
33
+ "class",
34
+ "interface_declaration",
35
+ "type_alias_declaration",
36
+ ]
37
+
38
+ def extract_decorators(self, node: Node) -> list[str]:
39
+ """Extract decorators from a function or class node.
40
+
41
+ TypeScript supports decorators (experimental feature).
42
+
43
+ Args:
44
+ node: The AST node
45
+
46
+ Returns:
47
+ List of decorator names
48
+ """
49
+ decorators: list[str] = []
50
+
51
+ for child in node.children:
52
+ if child.type == "decorator":
53
+ for grandchild in child.children:
54
+ if grandchild.type == "identifier" and grandchild.text:
55
+ decorators.append(grandchild.text.decode("utf-8"))
56
+ break
57
+ elif grandchild.type == "call_expression":
58
+ for call_child in grandchild.children:
59
+ if call_child.type == "identifier" and call_child.text:
60
+ decorators.append(call_child.text.decode("utf-8"))
61
+ break
62
+
63
+ return decorators
64
+
65
+ def extract_inheritance(self, class_node: Node) -> list[str]:
66
+ """Extract parent class/interface names from a class or interface.
67
+
68
+ TypeScript classes can extend one class and implement multiple interfaces.
69
+ Interfaces can extend multiple interfaces.
70
+
71
+ Args:
72
+ class_node: The class/interface definition AST node
73
+
74
+ Returns:
75
+ List of parent names
76
+ """
77
+ parent_names: list[str] = []
78
+
79
+ for child in class_node.children:
80
+ if child.type in ["extends_clause", "implements_clause"]:
81
+ for type_node in child.children:
82
+ if type_node.type == "type_identifier" and type_node.text:
83
+ parent_names.append(type_node.text.decode("utf-8"))
84
+ elif type_node.type == "generic_type":
85
+ name_node = type_node.child_by_field_name("name")
86
+ if name_node and name_node.text:
87
+ parent_names.append(name_node.text.decode("utf-8"))
88
+
89
+ if not parent_names:
90
+ parent_names = super().extract_inheritance(class_node)
91
+
92
+ return parent_names
@@ -0,0 +1,252 @@
1
+ """Gitignore pattern matching for codebase indexing.
2
+
3
+ This module provides functionality to read and apply .gitignore rules
4
+ to filter files during indexing, significantly improving performance
5
+ for large codebases that may contain ignored directories like venv,
6
+ node_modules, build artifacts, etc.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from pathlib import Path
12
+ from typing import TYPE_CHECKING
13
+
14
+ import pathspec
15
+
16
+ from shotgun.codebase.models import GitignoreStats
17
+ from shotgun.logging_config import get_logger
18
+
19
+ if TYPE_CHECKING:
20
+ from pathspec import PathSpec
21
+
22
+ logger = get_logger(__name__)
23
+
24
+
25
+ class GitignoreManager:
26
+ """Manages gitignore patterns for a repository.
27
+
28
+ This class loads and caches gitignore patterns from:
29
+ 1. The repository's .gitignore file
30
+ 2. Any .gitignore files in subdirectories (hierarchical gitignore)
31
+ 3. Global gitignore patterns (optional)
32
+
33
+ Usage:
34
+ manager = GitignoreManager(repo_path)
35
+ if manager.is_ignored("path/to/file.py"):
36
+ # Skip this file
37
+ pass
38
+ """
39
+
40
+ def __init__(
41
+ self,
42
+ repo_path: Path,
43
+ load_nested: bool = True,
44
+ respect_gitignore: bool = True,
45
+ ):
46
+ """Initialize the gitignore manager.
47
+
48
+ Args:
49
+ repo_path: Root path of the repository
50
+ load_nested: Whether to load .gitignore files from subdirectories
51
+ respect_gitignore: Whether to respect .gitignore at all (if False, nothing is ignored)
52
+ """
53
+ self.repo_path = repo_path.resolve()
54
+ self.load_nested = load_nested
55
+ self.respect_gitignore = respect_gitignore
56
+
57
+ # Cache for PathSpec objects by directory
58
+ self._specs: dict[Path, PathSpec] = {}
59
+
60
+ # Combined spec for the root gitignore
61
+ self._root_spec: PathSpec | None = None
62
+
63
+ # Statistics for debugging
64
+ self.stats = GitignoreStats()
65
+
66
+ if respect_gitignore:
67
+ self._load_gitignore_files()
68
+
69
+ def _load_gitignore_files(self) -> None:
70
+ """Load all gitignore files in the repository."""
71
+ root_gitignore = self.repo_path / ".gitignore"
72
+
73
+ if root_gitignore.exists():
74
+ self._root_spec = self._load_gitignore_file(root_gitignore)
75
+ self.stats.gitignore_files_loaded += 1
76
+ logger.debug(
77
+ f"Loaded root .gitignore with patterns - "
78
+ f"path: {root_gitignore}, patterns: {self.stats.patterns_loaded}"
79
+ )
80
+
81
+ if self.load_nested:
82
+ # Find all nested .gitignore files
83
+ for gitignore_path in self.repo_path.rglob(".gitignore"):
84
+ if gitignore_path.parent == self.repo_path:
85
+ continue # Skip root, already loaded
86
+
87
+ spec = self._load_gitignore_file(gitignore_path)
88
+ if spec:
89
+ self._specs[gitignore_path.parent] = spec
90
+ self.stats.gitignore_files_loaded += 1
91
+
92
+ if self._specs:
93
+ logger.debug(f"Loaded {len(self._specs)} nested .gitignore files")
94
+
95
+ def _load_gitignore_file(self, gitignore_path: Path) -> PathSpec | None:
96
+ """Load patterns from a single gitignore file.
97
+
98
+ Args:
99
+ gitignore_path: Path to the .gitignore file
100
+
101
+ Returns:
102
+ PathSpec object or None if file couldn't be loaded
103
+ """
104
+ try:
105
+ with open(gitignore_path, encoding="utf-8", errors="ignore") as f:
106
+ patterns = f.read().splitlines()
107
+
108
+ # Filter out empty lines and comments
109
+ valid_patterns = [
110
+ p.strip()
111
+ for p in patterns
112
+ if p.strip() and not p.strip().startswith("#")
113
+ ]
114
+
115
+ if not valid_patterns:
116
+ return None
117
+
118
+ self.stats.patterns_loaded += len(valid_patterns)
119
+
120
+ return pathspec.PathSpec.from_lines("gitwildmatch", valid_patterns)
121
+ except Exception as e:
122
+ logger.warning(f"Failed to load gitignore: {gitignore_path}, error: {e}")
123
+ return None
124
+
125
+ def is_ignored(self, path: str | Path) -> bool:
126
+ """Check if a path should be ignored based on gitignore rules.
127
+
128
+ Args:
129
+ path: Path to check (relative to repo root or absolute)
130
+
131
+ Returns:
132
+ True if the path should be ignored
133
+ """
134
+ if not self.respect_gitignore:
135
+ return False
136
+
137
+ self.stats.files_checked += 1
138
+
139
+ # Convert to Path and make relative to repo root
140
+ path_obj = Path(path) if isinstance(path, str) else path
141
+
142
+ if path_obj.is_absolute():
143
+ # Resolve to handle symlinks (e.g., /var -> /private/var on macOS)
144
+ try:
145
+ resolved_path = path_obj.resolve()
146
+ path_obj = resolved_path.relative_to(self.repo_path)
147
+ except ValueError:
148
+ # Path is not under repo_path
149
+ return False
150
+
151
+ # Convert to string with forward slashes for consistency
152
+ path_str = str(path_obj).replace("\\", "/")
153
+
154
+ # Check root gitignore first
155
+ if self._root_spec and self._root_spec.match_file(path_str):
156
+ self.stats.files_ignored += 1
157
+ return True
158
+
159
+ # Check nested gitignore files
160
+ if self.load_nested:
161
+ # Walk up the directory tree to find applicable gitignore files
162
+ current_dir = (self.repo_path / path_obj).parent
163
+ while current_dir >= self.repo_path:
164
+ if current_dir in self._specs:
165
+ # Make path relative to this gitignore's directory
166
+ try:
167
+ rel_path = path_obj.relative_to(
168
+ current_dir.relative_to(self.repo_path)
169
+ )
170
+ rel_path_str = str(rel_path).replace("\\", "/")
171
+ if self._specs[current_dir].match_file(rel_path_str):
172
+ self.stats.files_ignored += 1
173
+ return True
174
+ except ValueError:
175
+ pass
176
+ current_dir = current_dir.parent
177
+
178
+ return False
179
+
180
+ def is_directory_ignored(self, path: str | Path) -> bool:
181
+ """Check if a directory should be ignored.
182
+
183
+ For directories, we add a trailing slash to match gitignore semantics.
184
+
185
+ Args:
186
+ path: Directory path to check
187
+
188
+ Returns:
189
+ True if the directory should be ignored
190
+ """
191
+ if not self.respect_gitignore:
192
+ return False
193
+
194
+ # Convert to Path and make relative to repo root
195
+ path_obj = Path(path) if isinstance(path, str) else path
196
+
197
+ if path_obj.is_absolute():
198
+ try:
199
+ path_obj = path_obj.relative_to(self.repo_path)
200
+ except ValueError:
201
+ return False
202
+
203
+ # Check both with and without trailing slash
204
+ path_str = str(path_obj).replace("\\", "/")
205
+ path_str_dir = path_str.rstrip("/") + "/"
206
+
207
+ # Check root gitignore
208
+ if self._root_spec:
209
+ if self._root_spec.match_file(path_str) or self._root_spec.match_file(
210
+ path_str_dir
211
+ ):
212
+ logger.debug(f"Directory ignored by root .gitignore: {path_str}")
213
+ return True
214
+
215
+ return False
216
+
217
+ def filter_paths(self, paths: list[Path]) -> list[Path]:
218
+ """Filter a list of paths, removing ignored ones.
219
+
220
+ Args:
221
+ paths: List of paths to filter
222
+
223
+ Returns:
224
+ List of paths that are not ignored
225
+ """
226
+ return [p for p in paths if not self.is_ignored(p)]
227
+
228
+ def get_stats_summary(self) -> str:
229
+ """Get a summary of gitignore statistics.
230
+
231
+ Returns:
232
+ Human-readable statistics string
233
+ """
234
+ return (
235
+ f"Gitignore stats: "
236
+ f"{self.stats.gitignore_files_loaded} files loaded, "
237
+ f"{self.stats.patterns_loaded} patterns, "
238
+ f"{self.stats.files_checked} paths checked, "
239
+ f"{self.stats.files_ignored} ignored"
240
+ )
241
+
242
+
243
+ def load_gitignore_for_repo(repo_path: Path | str) -> GitignoreManager:
244
+ """Convenience function to create a GitignoreManager for a repository.
245
+
246
+ Args:
247
+ repo_path: Path to the repository root
248
+
249
+ Returns:
250
+ Configured GitignoreManager
251
+ """
252
+ return GitignoreManager(Path(repo_path))