intentgraph 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,14 @@
1
+ """IntentGraph - A best-in-class repository dependency analyzer."""
2
+
3
+ __version__ = "0.1.0"
4
+ __author__ = "Your Name"
5
+ __email__ = "your.email@example.com"
6
+
7
+ from .domain.models import AnalysisResult, FileInfo, Language, LanguageSummary
8
+
9
+ __all__ = [
10
+ "AnalysisResult",
11
+ "FileInfo",
12
+ "Language",
13
+ "LanguageSummary",
14
+ ]
@@ -0,0 +1 @@
1
+ """Infrastructure adapters for IntentGraph."""
@@ -0,0 +1,124 @@
1
+ """Git integration and gitignore handling."""
2
+
3
+ import logging
4
+ from pathlib import Path
5
+
6
+ import pathspec
7
+ from git import Repo
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class GitIgnoreHandler:
13
+ """Handles .gitignore file parsing and matching."""
14
+
15
+ def __init__(self) -> None:
16
+ self._spec: pathspec.PathSpec | None = None
17
+ self._repo_path: Path | None = None
18
+
19
+ def load_gitignore(self, repo_path: Path) -> None:
20
+ """Load .gitignore patterns from repository."""
21
+ self._repo_path = repo_path
22
+ patterns = []
23
+
24
+ try:
25
+ # Load .gitignore from root
26
+ gitignore_path = repo_path / ".gitignore"
27
+ if gitignore_path.exists():
28
+ patterns.extend(gitignore_path.read_text(encoding="utf-8").splitlines())
29
+
30
+ # Load nested .gitignore files
31
+ for gitignore_file in repo_path.rglob(".gitignore"):
32
+ if gitignore_file != gitignore_path:
33
+ try:
34
+ relative_dir = gitignore_file.parent.relative_to(repo_path)
35
+ nested_patterns = gitignore_file.read_text(encoding="utf-8").splitlines()
36
+
37
+ # Prefix patterns with relative directory
38
+ for pattern in nested_patterns:
39
+ if pattern.strip() and not pattern.startswith("#"):
40
+ if pattern.startswith("/"):
41
+ # Absolute pattern within the nested directory
42
+ patterns.append(str(relative_dir / pattern[1:]))
43
+ else:
44
+ # Relative pattern
45
+ patterns.append(str(relative_dir / pattern))
46
+ except Exception as e:
47
+ logger.warning(f"Failed to load {gitignore_file}: {e}")
48
+
49
+ # Add common patterns
50
+ patterns.extend([
51
+ ".git/",
52
+ ".git/**",
53
+ "__pycache__/",
54
+ "*.pyc",
55
+ "*.pyo",
56
+ "*.pyd",
57
+ ".Python",
58
+ "build/",
59
+ "develop-eggs/",
60
+ "dist/",
61
+ "downloads/",
62
+ "eggs/",
63
+ ".eggs/",
64
+ "lib/",
65
+ "lib64/",
66
+ "parts/",
67
+ "sdist/",
68
+ "var/",
69
+ "wheels/",
70
+ "*.egg-info/",
71
+ ".installed.cfg",
72
+ "*.egg",
73
+ "node_modules/",
74
+ ".env",
75
+ ".venv/",
76
+ "env/",
77
+ "venv/",
78
+ "ENV/",
79
+ "env.bak/",
80
+ "venv.bak/",
81
+ ])
82
+
83
+ # Create pathspec
84
+ self._spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
85
+
86
+ except Exception as e:
87
+ logger.warning(f"Failed to load .gitignore: {e}")
88
+ self._spec = pathspec.PathSpec.from_lines("gitwildmatch", [])
89
+
90
+ def is_ignored(self, file_path: Path, repo_path: Path) -> bool:
91
+ """Check if file should be ignored according to .gitignore."""
92
+ if not self._spec or not self._repo_path:
93
+ return False
94
+
95
+ try:
96
+ # Get relative path from repository root
97
+ relative_path = file_path.relative_to(repo_path)
98
+
99
+ # Check if file matches any ignore pattern
100
+ return self._spec.match_file(str(relative_path))
101
+
102
+ except ValueError:
103
+ # File is outside repository
104
+ return True
105
+ except Exception as e:
106
+ logger.warning(f"Error checking ignore status for {file_path}: {e}")
107
+ return False
108
+
109
+ def get_tracked_files(self, repo_path: Path) -> list[Path]:
110
+ """Get list of files tracked by Git."""
111
+ try:
112
+ repo = Repo(repo_path)
113
+ tracked_files = []
114
+
115
+ for item in repo.index.entries:
116
+ file_path = repo_path / item[0]
117
+ if file_path.exists():
118
+ tracked_files.append(file_path)
119
+
120
+ return tracked_files
121
+
122
+ except Exception as e:
123
+ logger.warning(f"Failed to get tracked files: {e}")
124
+ return []
@@ -0,0 +1,122 @@
1
+ """Output formatting and validation."""
2
+
3
+ import json
4
+ import logging
5
+ from pathlib import Path
6
+ from types import MappingProxyType
7
+ from typing import Any
8
+ from uuid import UUID
9
+
10
+ import orjson
11
+
12
+ from ..domain.models import AnalysisResult
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class OutputFormatter:
18
+ """Formats and validates analysis results for output."""
19
+
20
+ def __init__(self, schema_path: Path | None = None):
21
+ self.schema_path = schema_path
22
+ self._schema: dict[str, Any] | None = None
23
+
24
+ if schema_path and schema_path.exists():
25
+ try:
26
+ self._schema = json.loads(schema_path.read_text())
27
+ except Exception as e:
28
+ logger.warning(f"Failed to load schema from {schema_path}: {e}")
29
+
30
+ def format_json(self, result: AnalysisResult, pretty: bool = True) -> str:
31
+ """Format analysis result as JSON."""
32
+ try:
33
+ # Validate with pydantic
34
+ result_dict = result.dict()
35
+
36
+ # Convert paths and UUIDs to strings
37
+ converted_dict = self._convert_types(result_dict)
38
+
39
+ # Format with orjson
40
+ if pretty:
41
+ return orjson.dumps(
42
+ converted_dict,
43
+ option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS
44
+ ).decode('utf-8')
45
+ else:
46
+ return orjson.dumps(converted_dict).decode('utf-8')
47
+
48
+ except Exception as e:
49
+ logger.error(f"Failed to format JSON output: {e}")
50
+ raise
51
+
52
+ def _convert_types(self, obj: Any, seen: set = None) -> Any:
53
+ """Convert complex types to JSON-serializable formats."""
54
+ if seen is None:
55
+ seen = set()
56
+
57
+ # Prevent recursion by tracking object ids
58
+ obj_id = id(obj)
59
+ if obj_id in seen:
60
+ return str(obj) # Fallback to string representation
61
+
62
+ if isinstance(obj, (dict, MappingProxyType)):
63
+ seen.add(obj_id)
64
+ try:
65
+ return {str(key): self._convert_types(value, seen) for key, value in obj.items()}
66
+ finally:
67
+ seen.remove(obj_id)
68
+ elif isinstance(obj, list):
69
+ return [self._convert_types(item, seen) for item in obj]
70
+ elif isinstance(obj, Path) or isinstance(obj, UUID):
71
+ return str(obj)
72
+ elif hasattr(obj, '__dict__') and not isinstance(obj, type(lambda: None)):
73
+ # Skip function types and Mock objects
74
+ if 'Mock' in str(type(obj)):
75
+ return str(obj)
76
+ seen.add(obj_id)
77
+ try:
78
+ return self._convert_types(obj.__dict__, seen)
79
+ finally:
80
+ seen.remove(obj_id)
81
+ else:
82
+ # Handle any other complex types by converting to string
83
+ try:
84
+ # Test if it's JSON serializable
85
+ json.dumps(obj)
86
+ return obj
87
+ except (TypeError, ValueError):
88
+ return str(obj)
89
+
90
+ def validate_against_schema(self, result: AnalysisResult) -> bool:
91
+ """Validate result against JSON schema."""
92
+ if not self._schema:
93
+ logger.warning("No schema available for validation")
94
+ return True
95
+
96
+ try:
97
+ import jsonschema
98
+
99
+ result_dict = result.dict()
100
+ converted_dict = self._convert_types(result_dict)
101
+
102
+ jsonschema.validate(converted_dict, self._schema)
103
+ return True
104
+
105
+ except ImportError:
106
+ logger.warning("jsonschema not available, skipping validation")
107
+ return True
108
+ except jsonschema.ValidationError as e:
109
+ logger.error(f"Schema validation failed: {e}")
110
+ return False
111
+ except Exception as e:
112
+ logger.error(f"Validation error: {e}")
113
+ return False
114
+
115
+ def export_to_file(self, result: AnalysisResult, output_path: Path, pretty: bool = True) -> None:
116
+ """Export result to file."""
117
+ try:
118
+ json_output = self.format_json(result, pretty)
119
+ output_path.write_text(json_output, encoding='utf-8')
120
+ except Exception as e:
121
+ logger.error(f"Failed to export to {output_path}: {e}")
122
+ raise
@@ -0,0 +1,39 @@
1
+ """Language parsers for dependency extraction."""
2
+
3
+ from typing import Optional
4
+
5
+ from ...domain.models import Language
6
+ from .base import LanguageParser
7
+ from .go_parser import GoParser
8
+ from .javascript_parser import JavaScriptParser
9
+ from .python_parser import PythonParser
10
+ from .typescript_parser import TypeScriptParser
11
+
12
+
13
+ class _ParserRegistry:
14
+ def __init__(self):
15
+ self._parsers = {
16
+ Language.PYTHON: PythonParser(),
17
+ Language.JAVASCRIPT: JavaScriptParser(),
18
+ Language.TYPESCRIPT: TypeScriptParser(),
19
+ Language.GO: GoParser(),
20
+ }
21
+
22
+ def get_parser(self, language: Language) -> LanguageParser | None:
23
+ return self._parsers.get(language)
24
+
25
+ _registry = _ParserRegistry()
26
+
27
+ def get_parser_for_language(language: Language) -> LanguageParser | None:
28
+ """Get appropriate parser for a language."""
29
+ return _registry.get_parser(language)
30
+
31
+
32
+ __all__ = [
33
+ "GoParser",
34
+ "JavaScriptParser",
35
+ "LanguageParser",
36
+ "PythonParser",
37
+ "TypeScriptParser",
38
+ "get_parser_for_language",
39
+ ]
@@ -0,0 +1,135 @@
1
+ """Base class for language parsers."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ from ...domain.models import CodeSymbol, APIExport, FunctionDependency
8
+
9
+
10
+ class LanguageParser(ABC):
11
+ """Abstract base class for language-specific parsers."""
12
+
13
+ @abstractmethod
14
+ def extract_dependencies(self, file_path: Path, repo_path: Path) -> list[str]:
15
+ """Extract dependencies from a source file.
16
+
17
+ Args:
18
+ file_path: Path to the source file
19
+ repo_path: Path to the repository root
20
+
21
+ Returns:
22
+ List of relative paths to dependencies within the repository
23
+ """
24
+ pass
25
+
26
+ def extract_code_structure(self, file_path: Path, repo_path: Path) -> tuple[
27
+ list[CodeSymbol],
28
+ list[APIExport],
29
+ list[FunctionDependency],
30
+ list[str], # imports
31
+ dict[str, any] # metadata (complexity, etc.)
32
+ ]:
33
+ """Extract detailed code structure information.
34
+
35
+ Args:
36
+ file_path: Path to the source file
37
+ repo_path: Path to the repository root
38
+
39
+ Returns:
40
+ Tuple of (symbols, exports, function_deps, imports, metadata)
41
+ """
42
+ # Default implementation returns empty data
43
+ return [], [], [], [], {}
44
+
45
+ def _resolve_import_path(self, import_path: str, file_path: Path, repo_path: Path) -> list[str]:
46
+ """Resolve import path to actual file paths with security validation."""
47
+ resolved_paths = []
48
+
49
+ # Validate import_path format
50
+ if not self._is_valid_import_path(import_path):
51
+ return []
52
+
53
+ # Handle relative imports
54
+ if import_path.startswith('.'):
55
+ base_dir = file_path.parent
56
+ relative_path = import_path.lstrip('.')
57
+ if relative_path:
58
+ target_path = base_dir / relative_path.replace('.', '/')
59
+ else:
60
+ target_path = base_dir
61
+ else:
62
+ # Handle absolute imports from repo root
63
+ target_path = repo_path / import_path.replace('.', '/')
64
+
65
+ # Resolve and validate the path
66
+ try:
67
+ resolved_target = target_path.resolve()
68
+ repo_resolved = repo_path.resolve()
69
+
70
+ # Ensure target is within repository boundaries
71
+ resolved_target.relative_to(repo_resolved)
72
+
73
+ # Continue with existing logic...
74
+ extensions = self._get_file_extensions()
75
+ for ext in extensions:
76
+ candidate = resolved_target.with_suffix(ext)
77
+ if candidate.exists() and candidate.is_file():
78
+ try:
79
+ rel_path = candidate.relative_to(repo_resolved)
80
+ resolved_paths.append(str(rel_path))
81
+ except ValueError:
82
+ # File is outside repository - skip
83
+ continue
84
+
85
+ except (ValueError, OSError):
86
+ # Path traversal attempt or invalid path
87
+ return []
88
+
89
+ # Try directory with __init__ file
90
+ try:
91
+ if resolved_target.is_dir():
92
+ for init_name in self._get_init_files():
93
+ init_file = resolved_target / init_name
94
+ if init_file.exists():
95
+ try:
96
+ rel_path = init_file.relative_to(repo_resolved)
97
+ resolved_paths.append(str(rel_path))
98
+ except ValueError:
99
+ # File is outside repository - skip
100
+ continue
101
+ except (ValueError, OSError):
102
+ # Path traversal attempt or invalid path
103
+ pass
104
+
105
+ return resolved_paths
106
+
107
+ def _is_valid_import_path(self, import_path: str) -> bool:
108
+ """Validate import path format to prevent injection."""
109
+ # Check for null bytes and control characters
110
+ if '\x00' in import_path or any(ord(c) < 32 for c in import_path if c not in '\t\n'):
111
+ return False
112
+
113
+ # Check length limits
114
+ if len(import_path) > 1000: # Reasonable limit
115
+ return False
116
+
117
+ # Check for excessive relative traversal
118
+ if import_path.count('..') > 10: # Reasonable limit
119
+ return False
120
+
121
+ # Must not be empty or whitespace only
122
+ if not import_path.strip():
123
+ return False
124
+
125
+ return True
126
+
127
+ @abstractmethod
128
+ def _get_file_extensions(self) -> list[str]:
129
+ """Get file extensions for this language."""
130
+ pass
131
+
132
+ @abstractmethod
133
+ def _get_init_files(self) -> list[str]:
134
+ """Get initialization file names for this language."""
135
+ pass