skylos 1.0.11__py3-none-any.whl → 1.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skylos might be problematic. Click here for more details.
- skylos/__init__.py +1 -1
- skylos/analyzer.py +108 -9
- skylos/cli.py +63 -4
- skylos/visitor.py +5 -7
- {skylos-1.0.11.dist-info → skylos-1.1.11.dist-info}/METADATA +1 -1
- skylos-1.1.11.dist-info/RECORD +25 -0
- test/conftest.py +212 -0
- test/test_analyzer.py +584 -0
- test/test_cli.py +353 -0
- test/test_integration.py +320 -0
- test/test_visitor.py +516 -22
- skylos-1.0.11.dist-info/RECORD +0 -30
- test/pykomodo/__init__.py +0 -0
- test/pykomodo/command_line.py +0 -176
- test/pykomodo/config.py +0 -20
- test/pykomodo/core.py +0 -121
- test/pykomodo/dashboard.py +0 -608
- test/pykomodo/enhanced_chunker.py +0 -304
- test/pykomodo/multi_dirs_chunker.py +0 -783
- test/pykomodo/pykomodo_config.py +0 -68
- test/pykomodo/token_chunker.py +0 -470
- {skylos-1.0.11.dist-info → skylos-1.1.11.dist-info}/WHEEL +0 -0
- {skylos-1.0.11.dist-info → skylos-1.1.11.dist-info}/entry_points.txt +0 -0
- {skylos-1.0.11.dist-info → skylos-1.1.11.dist-info}/top_level.txt +0 -0
test/pykomodo/command_line.py
DELETED
|
@@ -1,176 +0,0 @@
|
|
|
1
|
-
import sys
|
|
2
|
-
import argparse
|
|
3
|
-
import os
|
|
4
|
-
|
|
5
|
-
KOMODO_VERSION = "0.2.5"
|
|
6
|
-
|
|
7
|
-
def launch_dashboard():
|
|
8
|
-
"""Launch the dashboard interface."""
|
|
9
|
-
try:
|
|
10
|
-
from pykomodo.dashboard import launch_dashboard
|
|
11
|
-
print("Starting Komodo Dashboard...")
|
|
12
|
-
demo = launch_dashboard()
|
|
13
|
-
demo.launch(
|
|
14
|
-
server_name="0.0.0.0",
|
|
15
|
-
server_port=7860,
|
|
16
|
-
share=False,
|
|
17
|
-
debug=False
|
|
18
|
-
)
|
|
19
|
-
except ImportError as e:
|
|
20
|
-
print(f"[Error] Dashboard dependencies not available: {e}", file=sys.stderr)
|
|
21
|
-
print("Please install gradio: pip install gradio", file=sys.stderr)
|
|
22
|
-
sys.exit(1)
|
|
23
|
-
except Exception as e:
|
|
24
|
-
print(f"[Error] Failed to launch dashboard: {e}", file=sys.stderr)
|
|
25
|
-
sys.exit(1)
|
|
26
|
-
|
|
27
|
-
def main():
|
|
28
|
-
"""Main entry point for the komodo CLI."""
|
|
29
|
-
parser = argparse.ArgumentParser(
|
|
30
|
-
description="Process and chunk codebase content with advanced chunking strategies."
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
parser.add_argument("--version", action="version", version=f"komodo {KOMODO_VERSION}")
|
|
34
|
-
|
|
35
|
-
parser.add_argument("--dashboard", action="store_true",
|
|
36
|
-
help="Launch the web-based dashboard interface")
|
|
37
|
-
|
|
38
|
-
parser.add_argument("dirs", nargs="*", default=["."],
|
|
39
|
-
help="Directories to process (default: current directory)")
|
|
40
|
-
|
|
41
|
-
chunk_group = parser.add_mutually_exclusive_group(required=False)
|
|
42
|
-
chunk_group.add_argument("--equal-chunks", type=int,
|
|
43
|
-
help="Split into N equal chunks")
|
|
44
|
-
chunk_group.add_argument("--max-chunk-size", type=int,
|
|
45
|
-
help="Maximum tokens/lines per chunk")
|
|
46
|
-
chunk_group.add_argument("--max-tokens", type=int,
|
|
47
|
-
help="Maximum tokens per chunk (token-based chunking)")
|
|
48
|
-
|
|
49
|
-
parser.add_argument("--output-dir", default="chunks",
|
|
50
|
-
help="Output directory for chunks (default: chunks)")
|
|
51
|
-
|
|
52
|
-
parser.add_argument("--ignore", action="append", default=[],
|
|
53
|
-
help="Repeatable. Each usage adds one ignore pattern. Example: --ignore '**/node_modules/**' --ignore 'venv'")
|
|
54
|
-
parser.add_argument("--unignore", action="append", default=[],
|
|
55
|
-
help="Repeatable. Each usage adds one unignore pattern. Example: --unignore '*.md'")
|
|
56
|
-
|
|
57
|
-
parser.add_argument("--dry-run", action="store_true",
|
|
58
|
-
help="Show which files would be processed, but do not generate any chunks.")
|
|
59
|
-
|
|
60
|
-
parser.add_argument("--priority", action="append", default=[],
|
|
61
|
-
help="Priority rules in format 'pattern,score' (repeatable). Example: --priority '*.py,10' --priority 'file2.txt,20'")
|
|
62
|
-
|
|
63
|
-
parser.add_argument("--num-threads", type=int, default=4,
|
|
64
|
-
help="Number of processing threads (default: 4)")
|
|
65
|
-
|
|
66
|
-
parser.add_argument("--enhanced", action="store_true",
|
|
67
|
-
help="Enable LLM optimizations")
|
|
68
|
-
|
|
69
|
-
parser.add_argument("--semantic-chunks", action="store_true",
|
|
70
|
-
help="Use AST-based chunking for .py files (splits by top-level functions/classes)")
|
|
71
|
-
|
|
72
|
-
parser.add_argument("--context-window", type=int, default=4096,
|
|
73
|
-
help="Target LLM context window size (default: 4096)")
|
|
74
|
-
parser.add_argument("--min-relevance", type=float, default=0.3,
|
|
75
|
-
help="Minimum relevance score 0.0-1.0 (default: 0.3)")
|
|
76
|
-
parser.add_argument("--no-metadata", action="store_true",
|
|
77
|
-
help="Disable metadata extraction")
|
|
78
|
-
parser.add_argument("--keep-redundant", action="store_true",
|
|
79
|
-
help="Keep redundant content")
|
|
80
|
-
parser.add_argument("--no-summaries", action="store_true",
|
|
81
|
-
help="Disable summary generation")
|
|
82
|
-
|
|
83
|
-
parser.add_argument("--file-type", type=str,
|
|
84
|
-
help="Only chunk files of this type (e.g., 'pdf', 'py')")
|
|
85
|
-
|
|
86
|
-
parser.add_argument("--verbose", action="store_true",
|
|
87
|
-
help="Enable verbose output")
|
|
88
|
-
|
|
89
|
-
args = parser.parse_args()
|
|
90
|
-
|
|
91
|
-
if args.dashboard:
|
|
92
|
-
launch_dashboard()
|
|
93
|
-
return
|
|
94
|
-
|
|
95
|
-
if not any([args.equal_chunks, args.max_chunk_size, args.max_tokens]):
|
|
96
|
-
parser.error("One of --equal-chunks, --max-chunk-size, or --max-tokens is required (unless using --dashboard)")
|
|
97
|
-
|
|
98
|
-
if args.output_dir:
|
|
99
|
-
os.makedirs(args.output_dir, exist_ok=True)
|
|
100
|
-
|
|
101
|
-
priority_rules = []
|
|
102
|
-
for rule in args.priority:
|
|
103
|
-
if not rule:
|
|
104
|
-
continue
|
|
105
|
-
try:
|
|
106
|
-
pattern, score = rule.split(",", 1)
|
|
107
|
-
priority_rules.append((pattern.strip(), int(score.strip())))
|
|
108
|
-
except ValueError:
|
|
109
|
-
print(f"[Error] Priority rule must be 'pattern,score': {rule}",
|
|
110
|
-
file=sys.stderr)
|
|
111
|
-
sys.exit(1)
|
|
112
|
-
|
|
113
|
-
chunker = None
|
|
114
|
-
try:
|
|
115
|
-
if args.max_tokens:
|
|
116
|
-
try:
|
|
117
|
-
from pykomodo.token_chunker import TokenBasedChunker as ChunkerClass
|
|
118
|
-
if args.verbose:
|
|
119
|
-
print("Using TokenBasedChunker for token-based chunking")
|
|
120
|
-
except ImportError:
|
|
121
|
-
print("[Error] TokenBasedChunker not available. Please install tiktoken or update pykomodo.",
|
|
122
|
-
file=sys.stderr)
|
|
123
|
-
sys.exit(1)
|
|
124
|
-
|
|
125
|
-
chunker_args = {
|
|
126
|
-
"max_tokens_per_chunk": args.max_tokens,
|
|
127
|
-
"output_dir": args.output_dir,
|
|
128
|
-
"user_ignore": args.ignore,
|
|
129
|
-
"user_unignore": args.unignore,
|
|
130
|
-
"priority_rules": priority_rules,
|
|
131
|
-
"num_threads": args.num_threads,
|
|
132
|
-
"dry_run": args.dry_run,
|
|
133
|
-
"semantic_chunking": args.semantic_chunks,
|
|
134
|
-
"file_type": args.file_type,
|
|
135
|
-
"verbose": args.verbose
|
|
136
|
-
}
|
|
137
|
-
else:
|
|
138
|
-
if args.enhanced:
|
|
139
|
-
from pykomodo.enhanced_chunker import EnhancedParallelChunker as ChunkerClass
|
|
140
|
-
else:
|
|
141
|
-
from pykomodo.multi_dirs_chunker import ParallelChunker as ChunkerClass
|
|
142
|
-
|
|
143
|
-
chunker_args = {
|
|
144
|
-
"equal_chunks": args.equal_chunks,
|
|
145
|
-
"max_chunk_size": args.max_chunk_size,
|
|
146
|
-
"output_dir": args.output_dir,
|
|
147
|
-
"user_ignore": args.ignore,
|
|
148
|
-
"user_unignore": args.unignore,
|
|
149
|
-
"priority_rules": priority_rules,
|
|
150
|
-
"num_threads": args.num_threads,
|
|
151
|
-
"dry_run": args.dry_run,
|
|
152
|
-
"semantic_chunking": args.semantic_chunks,
|
|
153
|
-
"file_type": args.file_type
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
if args.enhanced:
|
|
157
|
-
chunker_args.update({
|
|
158
|
-
"extract_metadata": not args.no_metadata,
|
|
159
|
-
"add_summaries": not args.no_summaries,
|
|
160
|
-
"remove_redundancy": not args.keep_redundant,
|
|
161
|
-
"context_window": args.context_window,
|
|
162
|
-
"min_relevance_score": args.min_relevance
|
|
163
|
-
})
|
|
164
|
-
|
|
165
|
-
chunker = ChunkerClass(**chunker_args)
|
|
166
|
-
chunker.process_directories(args.dirs)
|
|
167
|
-
|
|
168
|
-
except Exception as e:
|
|
169
|
-
print(f"[Error] Processing failed: {e}", file=sys.stderr)
|
|
170
|
-
sys.exit(1)
|
|
171
|
-
finally:
|
|
172
|
-
if chunker and hasattr(chunker, 'close'):
|
|
173
|
-
chunker.close()
|
|
174
|
-
|
|
175
|
-
if __name__ == "__main__":
|
|
176
|
-
main()
|
test/pykomodo/config.py
DELETED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
# src/config.py
|
|
2
|
-
|
|
3
|
-
from dataclasses import dataclass
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from typing import Optional
|
|
6
|
-
|
|
7
|
-
@dataclass
|
|
8
|
-
class PriorityRule:
|
|
9
|
-
pattern: str
|
|
10
|
-
score: int
|
|
11
|
-
|
|
12
|
-
@dataclass
|
|
13
|
-
class KomodoConfig:
|
|
14
|
-
max_size: int = 10 * 1024 * 1024
|
|
15
|
-
token_mode: bool = False
|
|
16
|
-
output_dir: Optional[Path] = None
|
|
17
|
-
stream: bool = False
|
|
18
|
-
ignore_patterns: list[str] = None
|
|
19
|
-
priority_rules: list[PriorityRule] = None
|
|
20
|
-
binary_extensions: list[str] = None
|
test/pykomodo/core.py
DELETED
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import fnmatch
|
|
3
|
-
from typing import List, Optional
|
|
4
|
-
|
|
5
|
-
class PriorityRule:
|
|
6
|
-
"""
|
|
7
|
-
Simple Python container for (pattern, score).
|
|
8
|
-
"""
|
|
9
|
-
def __init__(self, pattern, score):
|
|
10
|
-
self.pattern: str = pattern
|
|
11
|
-
self.score: int = score
|
|
12
|
-
|
|
13
|
-
class PyCConfig:
|
|
14
|
-
"""
|
|
15
|
-
A pure Python equivalent of the 'PyCConfig' that in Cython
|
|
16
|
-
wrapped the 'CConfig' struct. This class maintains the same
|
|
17
|
-
conceptual fields but in Pythonic form (lists, strings, booleans).
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
def __init__(self):
|
|
21
|
-
self.max_size: int = 0
|
|
22
|
-
self.token_mode: bool = False
|
|
23
|
-
self.output_dir: Optional[str] = None
|
|
24
|
-
self.stream: bool = False
|
|
25
|
-
|
|
26
|
-
self.ignore_patterns: List[str] = []
|
|
27
|
-
self.unignore_patterns: List[str] = []
|
|
28
|
-
self.priority_rules: List[PriorityRule] = []
|
|
29
|
-
self.binary_exts: List[str] = []
|
|
30
|
-
|
|
31
|
-
def add_ignore_pattern(self, pattern: str) -> None:
|
|
32
|
-
"""
|
|
33
|
-
Just appends to a Python list.
|
|
34
|
-
"""
|
|
35
|
-
self.ignore_patterns.append(pattern)
|
|
36
|
-
|
|
37
|
-
def add_unignore_pattern(self, pattern: str) -> None:
|
|
38
|
-
self.unignore_patterns.append(pattern)
|
|
39
|
-
|
|
40
|
-
def add_priority_rule(self, pattern: str, score: int) -> None:
|
|
41
|
-
self.priority_rules.append(PriorityRule(pattern, score))
|
|
42
|
-
|
|
43
|
-
def should_ignore(self, path: str) -> bool:
|
|
44
|
-
"""
|
|
45
|
-
Return True if path matches one of the ignore_patterns,
|
|
46
|
-
unless it matches unignore_patterns first.
|
|
47
|
-
"""
|
|
48
|
-
for pat in self.unignore_patterns:
|
|
49
|
-
if fnmatch.fnmatch(path, pat):
|
|
50
|
-
return False
|
|
51
|
-
|
|
52
|
-
for pat in self.ignore_patterns:
|
|
53
|
-
if fnmatch.fnmatch(path, pat):
|
|
54
|
-
return True
|
|
55
|
-
|
|
56
|
-
return False
|
|
57
|
-
|
|
58
|
-
def calculate_priority(self, path: str) -> int:
|
|
59
|
-
"""
|
|
60
|
-
Returns the highest score among any matching priority rule.
|
|
61
|
-
"""
|
|
62
|
-
highest = 0
|
|
63
|
-
for rule in self.priority_rules:
|
|
64
|
-
if fnmatch.fnmatch(path, rule.pattern):
|
|
65
|
-
if rule.score > highest:
|
|
66
|
-
highest = rule.score
|
|
67
|
-
return highest
|
|
68
|
-
|
|
69
|
-
def is_binary_file(self, path: str) -> bool:
|
|
70
|
-
"""
|
|
71
|
-
1) If extension is in self.binary_exts -> True
|
|
72
|
-
2) Else read up to 512 bytes, if it has a null byte -> True
|
|
73
|
-
3) If can't open -> True
|
|
74
|
-
"""
|
|
75
|
-
_, ext = os.path.splitext(path)
|
|
76
|
-
ext = ext.lstrip(".").lower()
|
|
77
|
-
if ext in (b.lower() for b in self.binary_exts):
|
|
78
|
-
return True
|
|
79
|
-
|
|
80
|
-
try:
|
|
81
|
-
with open(path, "rb") as f:
|
|
82
|
-
chunk = f.read(512)
|
|
83
|
-
except OSError:
|
|
84
|
-
return True
|
|
85
|
-
|
|
86
|
-
if b"\0" in chunk:
|
|
87
|
-
return True
|
|
88
|
-
|
|
89
|
-
return False
|
|
90
|
-
|
|
91
|
-
def read_file_contents(self, path: str) -> str:
|
|
92
|
-
"""
|
|
93
|
-
Reads the entire file as text, returns it.
|
|
94
|
-
If can't open, return "<NULL>" or handle differently.
|
|
95
|
-
"""
|
|
96
|
-
try:
|
|
97
|
-
with open(path, "rb") as f:
|
|
98
|
-
data = f.read()
|
|
99
|
-
return data.decode("utf-8", errors="replace")
|
|
100
|
-
except OSError:
|
|
101
|
-
return "<NULL>"
|
|
102
|
-
|
|
103
|
-
def count_tokens(self, text: str) -> int:
|
|
104
|
-
"""
|
|
105
|
-
Replicates py_count_tokens:
|
|
106
|
-
Simple whitespace-based token counting in pure Python.
|
|
107
|
-
"""
|
|
108
|
-
return len(text.split())
|
|
109
|
-
|
|
110
|
-
def make_c_string(self, text: Optional[str]) -> str:
|
|
111
|
-
if text is None:
|
|
112
|
-
return "<NULL>"
|
|
113
|
-
return text
|
|
114
|
-
|
|
115
|
-
def __repr__(self) -> str:
|
|
116
|
-
return (f"PyCConfig(max_size={self.max_size}, token_mode={self.token_mode}, "
|
|
117
|
-
f"output_dir={self.output_dir!r}, stream={self.stream}, "
|
|
118
|
-
f"ignore_patterns={self.ignore_patterns}, "
|
|
119
|
-
f"unignore_patterns={self.unignore_patterns}, "
|
|
120
|
-
f"priority_rules={[ (r.pattern, r.score) for r in self.priority_rules ]}, "
|
|
121
|
-
f"binary_exts={self.binary_exts})")
|