intentgraph 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- intentgraph/__init__.py +14 -0
- intentgraph/adapters/__init__.py +1 -0
- intentgraph/adapters/git.py +124 -0
- intentgraph/adapters/output.py +122 -0
- intentgraph/adapters/parsers/__init__.py +39 -0
- intentgraph/adapters/parsers/base.py +135 -0
- intentgraph/adapters/parsers/enhanced_python_parser.py +367 -0
- intentgraph/adapters/parsers/go_parser.py +121 -0
- intentgraph/adapters/parsers/javascript_parser.py +100 -0
- intentgraph/adapters/parsers/python_parser.py +118 -0
- intentgraph/adapters/parsers/typescript_parser.py +102 -0
- intentgraph/application/__init__.py +1 -0
- intentgraph/application/analyzer.py +345 -0
- intentgraph/cli.py +201 -0
- intentgraph/domain/__init__.py +1 -0
- intentgraph/domain/exceptions.py +25 -0
- intentgraph/domain/graph.py +66 -0
- intentgraph/domain/models.py +155 -0
- intentgraph-0.1.0.dist-info/METADATA +406 -0
- intentgraph-0.1.0.dist-info/RECORD +24 -0
- intentgraph-0.1.0.dist-info/WHEEL +4 -0
- intentgraph-0.1.0.dist-info/entry_points.txt +2 -0
- intentgraph-0.1.0.dist-info/licenses/LICENSE +21 -0
- intentgraph-0.1.0.dist-info/licenses/NOTICE +10 -0
intentgraph/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""IntentGraph - A best-in-class repository dependency analyzer."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
__author__ = "Your Name"
|
|
5
|
+
__email__ = "your.email@example.com"
|
|
6
|
+
|
|
7
|
+
from .domain.models import AnalysisResult, FileInfo, Language, LanguageSummary
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"AnalysisResult",
|
|
11
|
+
"FileInfo",
|
|
12
|
+
"Language",
|
|
13
|
+
"LanguageSummary",
|
|
14
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Infrastructure adapters for IntentGraph."""
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""Git integration and gitignore handling."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import pathspec
|
|
7
|
+
from git import Repo
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class GitIgnoreHandler:
|
|
13
|
+
"""Handles .gitignore file parsing and matching."""
|
|
14
|
+
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
self._spec: pathspec.PathSpec | None = None
|
|
17
|
+
self._repo_path: Path | None = None
|
|
18
|
+
|
|
19
|
+
def load_gitignore(self, repo_path: Path) -> None:
|
|
20
|
+
"""Load .gitignore patterns from repository."""
|
|
21
|
+
self._repo_path = repo_path
|
|
22
|
+
patterns = []
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
# Load .gitignore from root
|
|
26
|
+
gitignore_path = repo_path / ".gitignore"
|
|
27
|
+
if gitignore_path.exists():
|
|
28
|
+
patterns.extend(gitignore_path.read_text(encoding="utf-8").splitlines())
|
|
29
|
+
|
|
30
|
+
# Load nested .gitignore files
|
|
31
|
+
for gitignore_file in repo_path.rglob(".gitignore"):
|
|
32
|
+
if gitignore_file != gitignore_path:
|
|
33
|
+
try:
|
|
34
|
+
relative_dir = gitignore_file.parent.relative_to(repo_path)
|
|
35
|
+
nested_patterns = gitignore_file.read_text(encoding="utf-8").splitlines()
|
|
36
|
+
|
|
37
|
+
# Prefix patterns with relative directory
|
|
38
|
+
for pattern in nested_patterns:
|
|
39
|
+
if pattern.strip() and not pattern.startswith("#"):
|
|
40
|
+
if pattern.startswith("/"):
|
|
41
|
+
# Absolute pattern within the nested directory
|
|
42
|
+
patterns.append(str(relative_dir / pattern[1:]))
|
|
43
|
+
else:
|
|
44
|
+
# Relative pattern
|
|
45
|
+
patterns.append(str(relative_dir / pattern))
|
|
46
|
+
except Exception as e:
|
|
47
|
+
logger.warning(f"Failed to load {gitignore_file}: {e}")
|
|
48
|
+
|
|
49
|
+
# Add common patterns
|
|
50
|
+
patterns.extend([
|
|
51
|
+
".git/",
|
|
52
|
+
".git/**",
|
|
53
|
+
"__pycache__/",
|
|
54
|
+
"*.pyc",
|
|
55
|
+
"*.pyo",
|
|
56
|
+
"*.pyd",
|
|
57
|
+
".Python",
|
|
58
|
+
"build/",
|
|
59
|
+
"develop-eggs/",
|
|
60
|
+
"dist/",
|
|
61
|
+
"downloads/",
|
|
62
|
+
"eggs/",
|
|
63
|
+
".eggs/",
|
|
64
|
+
"lib/",
|
|
65
|
+
"lib64/",
|
|
66
|
+
"parts/",
|
|
67
|
+
"sdist/",
|
|
68
|
+
"var/",
|
|
69
|
+
"wheels/",
|
|
70
|
+
"*.egg-info/",
|
|
71
|
+
".installed.cfg",
|
|
72
|
+
"*.egg",
|
|
73
|
+
"node_modules/",
|
|
74
|
+
".env",
|
|
75
|
+
".venv/",
|
|
76
|
+
"env/",
|
|
77
|
+
"venv/",
|
|
78
|
+
"ENV/",
|
|
79
|
+
"env.bak/",
|
|
80
|
+
"venv.bak/",
|
|
81
|
+
])
|
|
82
|
+
|
|
83
|
+
# Create pathspec
|
|
84
|
+
self._spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
|
|
85
|
+
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.warning(f"Failed to load .gitignore: {e}")
|
|
88
|
+
self._spec = pathspec.PathSpec.from_lines("gitwildmatch", [])
|
|
89
|
+
|
|
90
|
+
def is_ignored(self, file_path: Path, repo_path: Path) -> bool:
|
|
91
|
+
"""Check if file should be ignored according to .gitignore."""
|
|
92
|
+
if not self._spec or not self._repo_path:
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
# Get relative path from repository root
|
|
97
|
+
relative_path = file_path.relative_to(repo_path)
|
|
98
|
+
|
|
99
|
+
# Check if file matches any ignore pattern
|
|
100
|
+
return self._spec.match_file(str(relative_path))
|
|
101
|
+
|
|
102
|
+
except ValueError:
|
|
103
|
+
# File is outside repository
|
|
104
|
+
return True
|
|
105
|
+
except Exception as e:
|
|
106
|
+
logger.warning(f"Error checking ignore status for {file_path}: {e}")
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
def get_tracked_files(self, repo_path: Path) -> list[Path]:
|
|
110
|
+
"""Get list of files tracked by Git."""
|
|
111
|
+
try:
|
|
112
|
+
repo = Repo(repo_path)
|
|
113
|
+
tracked_files = []
|
|
114
|
+
|
|
115
|
+
for item in repo.index.entries:
|
|
116
|
+
file_path = repo_path / item[0]
|
|
117
|
+
if file_path.exists():
|
|
118
|
+
tracked_files.append(file_path)
|
|
119
|
+
|
|
120
|
+
return tracked_files
|
|
121
|
+
|
|
122
|
+
except Exception as e:
|
|
123
|
+
logger.warning(f"Failed to get tracked files: {e}")
|
|
124
|
+
return []
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
"""Output formatting and validation."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import logging
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from types import MappingProxyType
|
|
7
|
+
from typing import Any
|
|
8
|
+
from uuid import UUID
|
|
9
|
+
|
|
10
|
+
import orjson
|
|
11
|
+
|
|
12
|
+
from ..domain.models import AnalysisResult
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class OutputFormatter:
|
|
18
|
+
"""Formats and validates analysis results for output."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, schema_path: Path | None = None):
|
|
21
|
+
self.schema_path = schema_path
|
|
22
|
+
self._schema: dict[str, Any] | None = None
|
|
23
|
+
|
|
24
|
+
if schema_path and schema_path.exists():
|
|
25
|
+
try:
|
|
26
|
+
self._schema = json.loads(schema_path.read_text())
|
|
27
|
+
except Exception as e:
|
|
28
|
+
logger.warning(f"Failed to load schema from {schema_path}: {e}")
|
|
29
|
+
|
|
30
|
+
def format_json(self, result: AnalysisResult, pretty: bool = True) -> str:
|
|
31
|
+
"""Format analysis result as JSON."""
|
|
32
|
+
try:
|
|
33
|
+
# Validate with pydantic
|
|
34
|
+
result_dict = result.dict()
|
|
35
|
+
|
|
36
|
+
# Convert paths and UUIDs to strings
|
|
37
|
+
converted_dict = self._convert_types(result_dict)
|
|
38
|
+
|
|
39
|
+
# Format with orjson
|
|
40
|
+
if pretty:
|
|
41
|
+
return orjson.dumps(
|
|
42
|
+
converted_dict,
|
|
43
|
+
option=orjson.OPT_INDENT_2 | orjson.OPT_SORT_KEYS
|
|
44
|
+
).decode('utf-8')
|
|
45
|
+
else:
|
|
46
|
+
return orjson.dumps(converted_dict).decode('utf-8')
|
|
47
|
+
|
|
48
|
+
except Exception as e:
|
|
49
|
+
logger.error(f"Failed to format JSON output: {e}")
|
|
50
|
+
raise
|
|
51
|
+
|
|
52
|
+
def _convert_types(self, obj: Any, seen: set = None) -> Any:
|
|
53
|
+
"""Convert complex types to JSON-serializable formats."""
|
|
54
|
+
if seen is None:
|
|
55
|
+
seen = set()
|
|
56
|
+
|
|
57
|
+
# Prevent recursion by tracking object ids
|
|
58
|
+
obj_id = id(obj)
|
|
59
|
+
if obj_id in seen:
|
|
60
|
+
return str(obj) # Fallback to string representation
|
|
61
|
+
|
|
62
|
+
if isinstance(obj, (dict, MappingProxyType)):
|
|
63
|
+
seen.add(obj_id)
|
|
64
|
+
try:
|
|
65
|
+
return {str(key): self._convert_types(value, seen) for key, value in obj.items()}
|
|
66
|
+
finally:
|
|
67
|
+
seen.remove(obj_id)
|
|
68
|
+
elif isinstance(obj, list):
|
|
69
|
+
return [self._convert_types(item, seen) for item in obj]
|
|
70
|
+
elif isinstance(obj, Path) or isinstance(obj, UUID):
|
|
71
|
+
return str(obj)
|
|
72
|
+
elif hasattr(obj, '__dict__') and not isinstance(obj, type(lambda: None)):
|
|
73
|
+
# Skip function types and Mock objects
|
|
74
|
+
if 'Mock' in str(type(obj)):
|
|
75
|
+
return str(obj)
|
|
76
|
+
seen.add(obj_id)
|
|
77
|
+
try:
|
|
78
|
+
return self._convert_types(obj.__dict__, seen)
|
|
79
|
+
finally:
|
|
80
|
+
seen.remove(obj_id)
|
|
81
|
+
else:
|
|
82
|
+
# Handle any other complex types by converting to string
|
|
83
|
+
try:
|
|
84
|
+
# Test if it's JSON serializable
|
|
85
|
+
json.dumps(obj)
|
|
86
|
+
return obj
|
|
87
|
+
except (TypeError, ValueError):
|
|
88
|
+
return str(obj)
|
|
89
|
+
|
|
90
|
+
def validate_against_schema(self, result: AnalysisResult) -> bool:
|
|
91
|
+
"""Validate result against JSON schema."""
|
|
92
|
+
if not self._schema:
|
|
93
|
+
logger.warning("No schema available for validation")
|
|
94
|
+
return True
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
import jsonschema
|
|
98
|
+
|
|
99
|
+
result_dict = result.dict()
|
|
100
|
+
converted_dict = self._convert_types(result_dict)
|
|
101
|
+
|
|
102
|
+
jsonschema.validate(converted_dict, self._schema)
|
|
103
|
+
return True
|
|
104
|
+
|
|
105
|
+
except ImportError:
|
|
106
|
+
logger.warning("jsonschema not available, skipping validation")
|
|
107
|
+
return True
|
|
108
|
+
except jsonschema.ValidationError as e:
|
|
109
|
+
logger.error(f"Schema validation failed: {e}")
|
|
110
|
+
return False
|
|
111
|
+
except Exception as e:
|
|
112
|
+
logger.error(f"Validation error: {e}")
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
def export_to_file(self, result: AnalysisResult, output_path: Path, pretty: bool = True) -> None:
|
|
116
|
+
"""Export result to file."""
|
|
117
|
+
try:
|
|
118
|
+
json_output = self.format_json(result, pretty)
|
|
119
|
+
output_path.write_text(json_output, encoding='utf-8')
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logger.error(f"Failed to export to {output_path}: {e}")
|
|
122
|
+
raise
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""Language parsers for dependency extraction."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from ...domain.models import Language
|
|
6
|
+
from .base import LanguageParser
|
|
7
|
+
from .go_parser import GoParser
|
|
8
|
+
from .javascript_parser import JavaScriptParser
|
|
9
|
+
from .python_parser import PythonParser
|
|
10
|
+
from .typescript_parser import TypeScriptParser
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class _ParserRegistry:
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self._parsers = {
|
|
16
|
+
Language.PYTHON: PythonParser(),
|
|
17
|
+
Language.JAVASCRIPT: JavaScriptParser(),
|
|
18
|
+
Language.TYPESCRIPT: TypeScriptParser(),
|
|
19
|
+
Language.GO: GoParser(),
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
def get_parser(self, language: Language) -> LanguageParser | None:
|
|
23
|
+
return self._parsers.get(language)
|
|
24
|
+
|
|
25
|
+
_registry = _ParserRegistry()
|
|
26
|
+
|
|
27
|
+
def get_parser_for_language(language: Language) -> LanguageParser | None:
|
|
28
|
+
"""Get appropriate parser for a language."""
|
|
29
|
+
return _registry.get_parser(language)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"GoParser",
|
|
34
|
+
"JavaScriptParser",
|
|
35
|
+
"LanguageParser",
|
|
36
|
+
"PythonParser",
|
|
37
|
+
"TypeScriptParser",
|
|
38
|
+
"get_parser_for_language",
|
|
39
|
+
]
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Base class for language parsers."""
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
from ...domain.models import CodeSymbol, APIExport, FunctionDependency
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class LanguageParser(ABC):
|
|
11
|
+
"""Abstract base class for language-specific parsers."""
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def extract_dependencies(self, file_path: Path, repo_path: Path) -> list[str]:
|
|
15
|
+
"""Extract dependencies from a source file.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
file_path: Path to the source file
|
|
19
|
+
repo_path: Path to the repository root
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
List of relative paths to dependencies within the repository
|
|
23
|
+
"""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
def extract_code_structure(self, file_path: Path, repo_path: Path) -> tuple[
|
|
27
|
+
list[CodeSymbol],
|
|
28
|
+
list[APIExport],
|
|
29
|
+
list[FunctionDependency],
|
|
30
|
+
list[str], # imports
|
|
31
|
+
dict[str, any] # metadata (complexity, etc.)
|
|
32
|
+
]:
|
|
33
|
+
"""Extract detailed code structure information.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
file_path: Path to the source file
|
|
37
|
+
repo_path: Path to the repository root
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
Tuple of (symbols, exports, function_deps, imports, metadata)
|
|
41
|
+
"""
|
|
42
|
+
# Default implementation returns empty data
|
|
43
|
+
return [], [], [], [], {}
|
|
44
|
+
|
|
45
|
+
def _resolve_import_path(self, import_path: str, file_path: Path, repo_path: Path) -> list[str]:
|
|
46
|
+
"""Resolve import path to actual file paths with security validation."""
|
|
47
|
+
resolved_paths = []
|
|
48
|
+
|
|
49
|
+
# Validate import_path format
|
|
50
|
+
if not self._is_valid_import_path(import_path):
|
|
51
|
+
return []
|
|
52
|
+
|
|
53
|
+
# Handle relative imports
|
|
54
|
+
if import_path.startswith('.'):
|
|
55
|
+
base_dir = file_path.parent
|
|
56
|
+
relative_path = import_path.lstrip('.')
|
|
57
|
+
if relative_path:
|
|
58
|
+
target_path = base_dir / relative_path.replace('.', '/')
|
|
59
|
+
else:
|
|
60
|
+
target_path = base_dir
|
|
61
|
+
else:
|
|
62
|
+
# Handle absolute imports from repo root
|
|
63
|
+
target_path = repo_path / import_path.replace('.', '/')
|
|
64
|
+
|
|
65
|
+
# Resolve and validate the path
|
|
66
|
+
try:
|
|
67
|
+
resolved_target = target_path.resolve()
|
|
68
|
+
repo_resolved = repo_path.resolve()
|
|
69
|
+
|
|
70
|
+
# Ensure target is within repository boundaries
|
|
71
|
+
resolved_target.relative_to(repo_resolved)
|
|
72
|
+
|
|
73
|
+
# Continue with existing logic...
|
|
74
|
+
extensions = self._get_file_extensions()
|
|
75
|
+
for ext in extensions:
|
|
76
|
+
candidate = resolved_target.with_suffix(ext)
|
|
77
|
+
if candidate.exists() and candidate.is_file():
|
|
78
|
+
try:
|
|
79
|
+
rel_path = candidate.relative_to(repo_resolved)
|
|
80
|
+
resolved_paths.append(str(rel_path))
|
|
81
|
+
except ValueError:
|
|
82
|
+
# File is outside repository - skip
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
except (ValueError, OSError):
|
|
86
|
+
# Path traversal attempt or invalid path
|
|
87
|
+
return []
|
|
88
|
+
|
|
89
|
+
# Try directory with __init__ file
|
|
90
|
+
try:
|
|
91
|
+
if resolved_target.is_dir():
|
|
92
|
+
for init_name in self._get_init_files():
|
|
93
|
+
init_file = resolved_target / init_name
|
|
94
|
+
if init_file.exists():
|
|
95
|
+
try:
|
|
96
|
+
rel_path = init_file.relative_to(repo_resolved)
|
|
97
|
+
resolved_paths.append(str(rel_path))
|
|
98
|
+
except ValueError:
|
|
99
|
+
# File is outside repository - skip
|
|
100
|
+
continue
|
|
101
|
+
except (ValueError, OSError):
|
|
102
|
+
# Path traversal attempt or invalid path
|
|
103
|
+
pass
|
|
104
|
+
|
|
105
|
+
return resolved_paths
|
|
106
|
+
|
|
107
|
+
def _is_valid_import_path(self, import_path: str) -> bool:
|
|
108
|
+
"""Validate import path format to prevent injection."""
|
|
109
|
+
# Check for null bytes and control characters
|
|
110
|
+
if '\x00' in import_path or any(ord(c) < 32 for c in import_path if c not in '\t\n'):
|
|
111
|
+
return False
|
|
112
|
+
|
|
113
|
+
# Check length limits
|
|
114
|
+
if len(import_path) > 1000: # Reasonable limit
|
|
115
|
+
return False
|
|
116
|
+
|
|
117
|
+
# Check for excessive relative traversal
|
|
118
|
+
if import_path.count('..') > 10: # Reasonable limit
|
|
119
|
+
return False
|
|
120
|
+
|
|
121
|
+
# Must not be empty or whitespace only
|
|
122
|
+
if not import_path.strip():
|
|
123
|
+
return False
|
|
124
|
+
|
|
125
|
+
return True
|
|
126
|
+
|
|
127
|
+
@abstractmethod
|
|
128
|
+
def _get_file_extensions(self) -> list[str]:
|
|
129
|
+
"""Get file extensions for this language."""
|
|
130
|
+
pass
|
|
131
|
+
|
|
132
|
+
@abstractmethod
|
|
133
|
+
def _get_init_files(self) -> list[str]:
|
|
134
|
+
"""Get initialization file names for this language."""
|
|
135
|
+
pass
|