skylos 1.0.9__tar.gz → 1.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of skylos might be problematic. Click here for more details.
- {skylos-1.0.9 → skylos-1.0.11}/PKG-INFO +1 -1
- {skylos-1.0.9 → skylos-1.0.11}/README.md +1 -1
- {skylos-1.0.9 → skylos-1.0.11}/pyproject.toml +1 -1
- {skylos-1.0.9 → skylos-1.0.11}/setup.py +1 -1
- {skylos-1.0.9 → skylos-1.0.11}/skylos/__init__.py +1 -1
- {skylos-1.0.9 → skylos-1.0.11}/skylos/analyzer.py +89 -17
- {skylos-1.0.9 → skylos-1.0.11}/skylos/cli.py +24 -1
- {skylos-1.0.9 → skylos-1.0.11}/skylos/visitor.py +76 -16
- {skylos-1.0.9 → skylos-1.0.11}/skylos.egg-info/PKG-INFO +1 -1
- {skylos-1.0.9 → skylos-1.0.11}/skylos.egg-info/SOURCES.txt +9 -0
- skylos-1.0.11/test/pykomodo/command_line.py +176 -0
- skylos-1.0.11/test/pykomodo/config.py +20 -0
- skylos-1.0.11/test/pykomodo/core.py +121 -0
- skylos-1.0.11/test/pykomodo/dashboard.py +608 -0
- skylos-1.0.11/test/pykomodo/enhanced_chunker.py +304 -0
- skylos-1.0.11/test/pykomodo/multi_dirs_chunker.py +783 -0
- skylos-1.0.11/test/pykomodo/pykomodo_config.py +68 -0
- skylos-1.0.11/test/pykomodo/token_chunker.py +470 -0
- skylos-1.0.11/test/sample_repo/sample_repo/__init__.py +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/setup.cfg +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/skylos.egg-info/dependency_links.txt +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/skylos.egg-info/entry_points.txt +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/skylos.egg-info/requires.txt +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/skylos.egg-info/top_level.txt +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/test/__init__.py +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/test/compare_tools.py +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/test/diagnostics.py +0 -0
- {skylos-1.0.9/test/sample_repo → skylos-1.0.11/test/pykomodo}/__init__.py +0 -0
- {skylos-1.0.9/test/sample_repo → skylos-1.0.11/test}/sample_repo/__init__.py +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/test/sample_repo/app.py +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/test/sample_repo/sample_repo/commands.py +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/test/sample_repo/sample_repo/models.py +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/test/sample_repo/sample_repo/routes.py +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/test/sample_repo/sample_repo/utils.py +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/test/test_skylos.py +0 -0
- {skylos-1.0.9 → skylos-1.0.11}/test/test_visitor.py +0 -0
|
@@ -212,7 +212,7 @@ We welcome contributions! Please read our [Contributing Guidelines](CONTRIBUTING
|
|
|
212
212
|
5. Open a Pull Request
|
|
213
213
|
|
|
214
214
|
## Roadmap
|
|
215
|
-
|
|
215
|
+
- [ ] Add a production flag, to include dead codes that are used in test but not in the actual execution
|
|
216
216
|
- [ ] Expand our test cases
|
|
217
217
|
- [ ] Configuration file support
|
|
218
218
|
- [ ] Custom analysis rules
|
|
@@ -26,7 +26,6 @@ class Skylos:
|
|
|
26
26
|
return".".join(p)
|
|
27
27
|
|
|
28
28
|
def _mark_exports(self):
|
|
29
|
-
|
|
30
29
|
for name, d in self.defs.items():
|
|
31
30
|
if d.in_init and not d.simple_name.startswith('_'):
|
|
32
31
|
d.is_exported = True
|
|
@@ -71,7 +70,6 @@ class Skylos:
|
|
|
71
70
|
d.references += 1
|
|
72
71
|
|
|
73
72
|
def _get_base_classes(self, class_name):
|
|
74
|
-
"""Get base classes for a given class name"""
|
|
75
73
|
if class_name not in self.defs:
|
|
76
74
|
return []
|
|
77
75
|
|
|
@@ -83,7 +81,6 @@ class Skylos:
|
|
|
83
81
|
return []
|
|
84
82
|
|
|
85
83
|
def _apply_heuristics(self):
|
|
86
|
-
|
|
87
84
|
class_methods=defaultdict(list)
|
|
88
85
|
for d in self.defs.values():
|
|
89
86
|
if d.type in("method","function") and"." in d.name:
|
|
@@ -97,17 +94,30 @@ class Skylos:
|
|
|
97
94
|
if m.simple_name in AUTO_CALLED:m.references+=1
|
|
98
95
|
|
|
99
96
|
for d in self.defs.values():
|
|
100
|
-
if d.simple_name in MAGIC_METHODS or d.simple_name.startswith("__")and d.simple_name.endswith("__"):
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
if d.
|
|
104
|
-
|
|
105
|
-
|
|
97
|
+
if d.simple_name in MAGIC_METHODS or (d.simple_name.startswith("__") and d.simple_name.endswith("__")):
|
|
98
|
+
d.confidence = 0
|
|
99
|
+
|
|
100
|
+
if d.type == "parameter" and d.simple_name in ("self", "cls"):
|
|
101
|
+
d.confidence = 0
|
|
102
|
+
|
|
103
|
+
if d.type != "parameter" and (d.simple_name in MAGIC_METHODS or (d.simple_name.startswith("__") and d.simple_name.endswith("__"))):
|
|
104
|
+
d.confidence = 0
|
|
105
|
+
|
|
106
|
+
if not d.simple_name.startswith("_") and d.type in ("function", "method", "class"):
|
|
107
|
+
d.confidence = min(d.confidence, 90)
|
|
108
|
+
|
|
109
|
+
if d.in_init and d.type in ("function", "class"):
|
|
110
|
+
d.confidence = min(d.confidence, 85)
|
|
111
|
+
|
|
112
|
+
if d.name.split(".")[0] in self.dynamic:
|
|
113
|
+
d.confidence = min(d.confidence, 60)
|
|
114
|
+
|
|
115
|
+
if d.type == "variable" and d.simple_name == "_":
|
|
116
|
+
d.confidence = 0
|
|
117
|
+
|
|
106
118
|
if d.type == "method" and TEST_METHOD_PATTERN.match(d.simple_name):
|
|
107
|
-
# check if its in a class that inherits from a test base class
|
|
108
119
|
class_name = d.name.rsplit(".", 1)[0]
|
|
109
120
|
class_simple_name = class_name.split(".")[-1]
|
|
110
|
-
# class name suggests it's a test class, ignore test methods
|
|
111
121
|
if "Test" in class_simple_name or class_simple_name.endswith("TestCase"):
|
|
112
122
|
d.confidence = 0
|
|
113
123
|
|
|
@@ -133,10 +143,7 @@ class Skylos:
|
|
|
133
143
|
self._mark_refs()
|
|
134
144
|
self._apply_heuristics()
|
|
135
145
|
self._mark_exports()
|
|
136
|
-
|
|
137
|
-
# for name, d in self.defs.items():
|
|
138
|
-
# print(f" {d.type} '{name}': {d.references} refs, exported: {d.is_exported}, confidence: {d.confidence}")
|
|
139
|
-
|
|
146
|
+
|
|
140
147
|
thr = max(0, thr)
|
|
141
148
|
|
|
142
149
|
unused = []
|
|
@@ -144,7 +151,14 @@ class Skylos:
|
|
|
144
151
|
if d.references == 0 and not d.is_exported and d.confidence >= thr:
|
|
145
152
|
unused.append(d.to_dict())
|
|
146
153
|
|
|
147
|
-
result = {
|
|
154
|
+
result = {
|
|
155
|
+
"unused_functions": [],
|
|
156
|
+
"unused_imports": [],
|
|
157
|
+
"unused_classes": [],
|
|
158
|
+
"unused_variables": [],
|
|
159
|
+
"unused_parameters": []
|
|
160
|
+
}
|
|
161
|
+
|
|
148
162
|
for u in unused:
|
|
149
163
|
if u["type"] in ("function", "method"):
|
|
150
164
|
result["unused_functions"].append(u)
|
|
@@ -152,6 +166,10 @@ class Skylos:
|
|
|
152
166
|
result["unused_imports"].append(u)
|
|
153
167
|
elif u["type"] == "class":
|
|
154
168
|
result["unused_classes"].append(u)
|
|
169
|
+
elif u["type"] == "variable":
|
|
170
|
+
result["unused_variables"].append(u)
|
|
171
|
+
elif u["type"] == "parameter":
|
|
172
|
+
result["unused_parameters"].append(u)
|
|
155
173
|
|
|
156
174
|
return json.dumps(result, indent=2)
|
|
157
175
|
|
|
@@ -175,6 +193,60 @@ def analyze(path,conf=60):return Skylos().analyze(path,conf)
|
|
|
175
193
|
if __name__=="__main__":
|
|
176
194
|
if len(sys.argv)>1:
|
|
177
195
|
p=sys.argv[1];c=int(sys.argv[2])if len(sys.argv)>2 else 60
|
|
178
|
-
|
|
196
|
+
result = analyze(p,c)
|
|
197
|
+
|
|
198
|
+
data = json.loads(result)
|
|
199
|
+
print("\n🔍 Python Static Analysis Results")
|
|
200
|
+
print("===================================\n")
|
|
201
|
+
|
|
202
|
+
total_items = sum(len(items) for items in data.values())
|
|
203
|
+
|
|
204
|
+
print("Summary:")
|
|
205
|
+
if data["unused_functions"]:
|
|
206
|
+
print(f" • Unreachable functions: {len(data['unused_functions'])}")
|
|
207
|
+
if data["unused_imports"]:
|
|
208
|
+
print(f" • Unused imports: {len(data['unused_imports'])}")
|
|
209
|
+
if data["unused_classes"]:
|
|
210
|
+
print(f" • Unused classes: {len(data['unused_classes'])}")
|
|
211
|
+
if data["unused_variables"]:
|
|
212
|
+
print(f" • Unused variables: {len(data['unused_variables'])}")
|
|
213
|
+
|
|
214
|
+
if data["unused_functions"]:
|
|
215
|
+
print("\n📦 Unreachable Functions")
|
|
216
|
+
print("=======================")
|
|
217
|
+
for i, func in enumerate(data["unused_functions"], 1):
|
|
218
|
+
print(f" {i}. {func['name']}")
|
|
219
|
+
print(f" └─ {func['file']}:{func['line']}")
|
|
220
|
+
|
|
221
|
+
if data["unused_imports"]:
|
|
222
|
+
print("\n📥 Unused Imports")
|
|
223
|
+
print("================")
|
|
224
|
+
for i, imp in enumerate(data["unused_imports"], 1):
|
|
225
|
+
print(f" {i}. {imp['simple_name']}")
|
|
226
|
+
print(f" └─ {imp['file']}:{imp['line']}")
|
|
227
|
+
|
|
228
|
+
if data["unused_classes"]:
|
|
229
|
+
print("\n📋 Unused Classes")
|
|
230
|
+
print("=================")
|
|
231
|
+
for i, cls in enumerate(data["unused_classes"], 1):
|
|
232
|
+
print(f" {i}. {cls['name']}")
|
|
233
|
+
print(f" └─ {cls['file']}:{cls['line']}")
|
|
234
|
+
|
|
235
|
+
if data["unused_variables"]:
|
|
236
|
+
print("\n📊 Unused Variables")
|
|
237
|
+
print("==================")
|
|
238
|
+
for i, var in enumerate(data["unused_variables"], 1):
|
|
239
|
+
print(f" {i}. {var['name']}")
|
|
240
|
+
print(f" └─ {var['file']}:{var['line']}")
|
|
241
|
+
|
|
242
|
+
print("\n" + "─" * 50)
|
|
243
|
+
print(f"Found {total_items} dead code items. Add this badge to your README:")
|
|
244
|
+
print(f"```markdown")
|
|
245
|
+
print(f"")
|
|
246
|
+
print(f"```")
|
|
247
|
+
|
|
248
|
+
print("\nNext steps:")
|
|
249
|
+
print(" • Use --interactive to select specific items to remove")
|
|
250
|
+
print(" • Use --dry-run to preview changes before applying them")
|
|
179
251
|
else:
|
|
180
252
|
print("Usage: python Skylos.py <path> [confidence_threshold]")
|
|
@@ -247,6 +247,8 @@ def main() -> None:
|
|
|
247
247
|
|
|
248
248
|
unused_functions = result.get("unused_functions", [])
|
|
249
249
|
unused_imports = result.get("unused_imports", [])
|
|
250
|
+
unused_parameters = result.get("unused_parameters", [])
|
|
251
|
+
unused_variables = result.get("unused_variables", [])
|
|
250
252
|
|
|
251
253
|
logger.info(f"{Colors.CYAN}{Colors.BOLD}🔍 Python Static Analysis Results{Colors.RESET}")
|
|
252
254
|
logger.info(f"{Colors.CYAN}{'=' * 35}{Colors.RESET}")
|
|
@@ -254,7 +256,10 @@ def main() -> None:
|
|
|
254
256
|
logger.info(f"\n{Colors.BOLD}Summary:{Colors.RESET}")
|
|
255
257
|
logger.info(f" • Unreachable functions: {Colors.YELLOW}{len(unused_functions)}{Colors.RESET}")
|
|
256
258
|
logger.info(f" • Unused imports: {Colors.YELLOW}{len(unused_imports)}{Colors.RESET}")
|
|
257
|
-
|
|
259
|
+
logger.info(f" • Unused parameters: {Colors.YELLOW}{len(unused_parameters)}{Colors.RESET}")
|
|
260
|
+
logger.info(f" • Unused variables: {Colors.YELLOW}{len(unused_variables)}{Colors.RESET}")
|
|
261
|
+
|
|
262
|
+
|
|
258
263
|
if args.interactive and (unused_functions or unused_imports):
|
|
259
264
|
logger.info(f"\n{Colors.BOLD}Interactive Mode:{Colors.RESET}")
|
|
260
265
|
selected_functions, selected_imports = interactive_selection(logger, unused_functions, unused_imports)
|
|
@@ -324,6 +329,24 @@ def main() -> None:
|
|
|
324
329
|
else:
|
|
325
330
|
logger.info(f"\n{Colors.GREEN}✓ All imports are being used!{Colors.RESET}")
|
|
326
331
|
|
|
332
|
+
if unused_parameters:
|
|
333
|
+
logger.info(f"\n{Colors.BLUE}{Colors.BOLD}🔧 Unused Parameters{Colors.RESET}")
|
|
334
|
+
logger.info(f"{Colors.BLUE}{'=' * 18}{Colors.RESET}")
|
|
335
|
+
for i, item in enumerate(unused_parameters, 1):
|
|
336
|
+
logger.info(f"{Colors.GRAY}{i:2d}. {Colors.RESET}{Colors.BLUE}{item['name']}{Colors.RESET}")
|
|
337
|
+
logger.info(f" {Colors.GRAY}└─ {item['file']}:{item['line']}{Colors.RESET}")
|
|
338
|
+
else:
|
|
339
|
+
logger.info(f"\n{Colors.GREEN}✓ All parameters are being used!{Colors.RESET}")
|
|
340
|
+
|
|
341
|
+
if unused_variables:
|
|
342
|
+
logger.info(f"\n{Colors.YELLOW}{Colors.BOLD}📊 Unused Variables{Colors.RESET}")
|
|
343
|
+
logger.info(f"{Colors.YELLOW}{'=' * 18}{Colors.RESET}")
|
|
344
|
+
for i, item in enumerate(unused_variables, 1):
|
|
345
|
+
logger.info(f"{Colors.GRAY}{i:2d}. {Colors.RESET}{Colors.YELLOW}{item['name']}{Colors.RESET}")
|
|
346
|
+
logger.info(f" {Colors.GRAY}└─ {item['file']}:{item['line']}{Colors.RESET}")
|
|
347
|
+
else:
|
|
348
|
+
logger.info(f"\n{Colors.GREEN}✓ All variables are being used!{Colors.RESET}")
|
|
349
|
+
|
|
327
350
|
dead_code_count = len(unused_functions) + len(unused_imports)
|
|
328
351
|
print_badge(dead_code_count, logger)
|
|
329
352
|
|
|
@@ -52,6 +52,7 @@ class Visitor(ast.NodeVisitor):
|
|
|
52
52
|
self.dyn=set()
|
|
53
53
|
self.exports=set()
|
|
54
54
|
self.current_function_scope = []
|
|
55
|
+
self.current_function_params = []
|
|
55
56
|
|
|
56
57
|
def add_def(self,n,t,l):
|
|
57
58
|
if n not in{d.name for d in self.defs}:self.defs.append(Definition(n,t,self.file,l))
|
|
@@ -85,17 +86,27 @@ class Visitor(ast.NodeVisitor):
|
|
|
85
86
|
self.alias[a.asname or a.name.split(".")[-1]]=full
|
|
86
87
|
self.add_def(full,"import",node.lineno)
|
|
87
88
|
|
|
88
|
-
def visit_ImportFrom(self,node):
|
|
89
|
-
if node.module is None:
|
|
89
|
+
def visit_ImportFrom(self, node):
|
|
90
|
+
if node.module is None:
|
|
91
|
+
return
|
|
90
92
|
for a in node.names:
|
|
91
|
-
if a.name=="*":
|
|
92
|
-
|
|
93
|
+
if a.name == "*":
|
|
94
|
+
continue
|
|
95
|
+
base = node.module
|
|
93
96
|
if node.level:
|
|
94
|
-
parts=self.mod.split(".")
|
|
95
|
-
base=".".join(parts[:-node.level])+(f".{node.module}"if node.module else"")
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
parts = self.mod.split(".")
|
|
98
|
+
base = ".".join(parts[:-node.level]) + (f".{node.module}" if node.module else "")
|
|
99
|
+
|
|
100
|
+
full = f"{base}.{a.name}"
|
|
101
|
+
|
|
102
|
+
if a.asname:
|
|
103
|
+
alias_full = f"{self.mod}.{a.asname}" if self.mod else a.asname
|
|
104
|
+
self.add_def(alias_full, "import", node.lineno)
|
|
105
|
+
self.alias[a.asname] = full
|
|
106
|
+
self.add_ref(full)
|
|
107
|
+
else:
|
|
108
|
+
self.alias[a.name] = full
|
|
109
|
+
self.add_def(full, "import", node.lineno)
|
|
99
110
|
|
|
100
111
|
def visit_arguments(self, args):
|
|
101
112
|
for arg in args.args:
|
|
@@ -128,15 +139,25 @@ class Visitor(ast.NodeVisitor):
|
|
|
128
139
|
|
|
129
140
|
self.current_function_scope.append(node.name)
|
|
130
141
|
|
|
142
|
+
old_params = self.current_function_params
|
|
143
|
+
self.current_function_params = []
|
|
144
|
+
|
|
131
145
|
for d_node in node.decorator_list:
|
|
132
146
|
self.visit(d_node)
|
|
133
147
|
|
|
148
|
+
for arg in node.args.args:
|
|
149
|
+
param_name = f"{qualified_name}.{arg.arg}"
|
|
150
|
+
self.add_def(param_name, "parameter", node.lineno)
|
|
151
|
+
self.current_function_params.append((arg.arg, param_name))
|
|
152
|
+
|
|
134
153
|
self.visit_arguments(node.args)
|
|
135
154
|
self.visit_annotation(node.returns)
|
|
136
155
|
|
|
137
156
|
for stmt in node.body:
|
|
138
157
|
self.visit(stmt)
|
|
158
|
+
|
|
139
159
|
self.current_function_scope.pop()
|
|
160
|
+
self.current_function_params = old_params
|
|
140
161
|
|
|
141
162
|
visit_AsyncFunctionDef=visit_FunctionDef
|
|
142
163
|
|
|
@@ -178,6 +199,30 @@ class Visitor(ast.NodeVisitor):
|
|
|
178
199
|
self.visit(node.step)
|
|
179
200
|
|
|
180
201
|
def visit_Assign(self, node):
|
|
202
|
+
def process_target_for_def(target_node):
|
|
203
|
+
if isinstance(target_node, ast.Name):
|
|
204
|
+
var_name_simple = target_node.id
|
|
205
|
+
if var_name_simple == "__all__" and not self.current_function_scope and not self.cls:
|
|
206
|
+
return
|
|
207
|
+
|
|
208
|
+
scope_parts = [self.mod]
|
|
209
|
+
if self.cls:
|
|
210
|
+
scope_parts.append(self.cls)
|
|
211
|
+
if self.current_function_scope:
|
|
212
|
+
scope_parts.extend(self.current_function_scope)
|
|
213
|
+
|
|
214
|
+
prefix = '.'.join(filter(None, scope_parts))
|
|
215
|
+
qualified_var_name = f"{prefix}.{var_name_simple}" if prefix else var_name_simple
|
|
216
|
+
|
|
217
|
+
self.add_def(qualified_var_name, "variable", target_node.lineno)
|
|
218
|
+
|
|
219
|
+
elif isinstance(target_node, (ast.Tuple, ast.List)):
|
|
220
|
+
for elt in target_node.elts:
|
|
221
|
+
process_target_for_def(elt)
|
|
222
|
+
|
|
223
|
+
for t in node.targets:
|
|
224
|
+
process_target_for_def(t)
|
|
225
|
+
|
|
181
226
|
for target in node.targets:
|
|
182
227
|
if isinstance(target, ast.Name) and target.id == "__all__":
|
|
183
228
|
if isinstance(node.value, (ast.List, ast.Tuple)):
|
|
@@ -189,9 +234,10 @@ class Visitor(ast.NodeVisitor):
|
|
|
189
234
|
value = elt.s
|
|
190
235
|
|
|
191
236
|
if value is not None:
|
|
192
|
-
|
|
193
|
-
self.add_ref(
|
|
194
|
-
self.add_ref(value)
|
|
237
|
+
full_name_export = f"{self.mod}.{value}" if self.mod else value
|
|
238
|
+
self.add_ref(full_name_export)
|
|
239
|
+
self.add_ref(value)
|
|
240
|
+
|
|
195
241
|
self.generic_visit(node)
|
|
196
242
|
|
|
197
243
|
def visit_Call(self, node):
|
|
@@ -219,12 +265,26 @@ class Visitor(ast.NodeVisitor):
|
|
|
219
265
|
|
|
220
266
|
def visit_Name(self,node):
|
|
221
267
|
if isinstance(node.ctx,ast.Load):
|
|
222
|
-
self.
|
|
223
|
-
|
|
268
|
+
for param_name, param_full_name in self.current_function_params:
|
|
269
|
+
if node.id == param_name:
|
|
270
|
+
self.add_ref(param_full_name)
|
|
271
|
+
break
|
|
272
|
+
else:
|
|
273
|
+
# not parameter, handle normally
|
|
274
|
+
self.add_ref(self.qual(node.id))
|
|
275
|
+
if node.id in DYNAMIC_PATTERNS:
|
|
276
|
+
self.dyn.add(self.mod.split(".")[0])
|
|
224
277
|
|
|
225
|
-
def visit_Attribute(self,node):
|
|
278
|
+
def visit_Attribute(self, node):
|
|
226
279
|
self.generic_visit(node)
|
|
227
|
-
if isinstance(node.ctx,ast.Load)and isinstance(node.value,ast.Name):
|
|
280
|
+
if isinstance(node.ctx, ast.Load) and isinstance(node.value, ast.Name):
|
|
281
|
+
if node.value.id in [param_name for param_name, _ in self.current_function_params]:
|
|
282
|
+
# mark parameter as referenced
|
|
283
|
+
for param_name, param_full_name in self.current_function_params:
|
|
284
|
+
if node.value.id == param_name:
|
|
285
|
+
self.add_ref(param_full_name)
|
|
286
|
+
break
|
|
287
|
+
|
|
228
288
|
self.add_ref(f"{self.qual(node.value.id)}.{node.attr}")
|
|
229
289
|
|
|
230
290
|
def visit_keyword(self, node):
|
|
@@ -16,6 +16,15 @@ test/compare_tools.py
|
|
|
16
16
|
test/diagnostics.py
|
|
17
17
|
test/test_skylos.py
|
|
18
18
|
test/test_visitor.py
|
|
19
|
+
test/pykomodo/__init__.py
|
|
20
|
+
test/pykomodo/command_line.py
|
|
21
|
+
test/pykomodo/config.py
|
|
22
|
+
test/pykomodo/core.py
|
|
23
|
+
test/pykomodo/dashboard.py
|
|
24
|
+
test/pykomodo/enhanced_chunker.py
|
|
25
|
+
test/pykomodo/multi_dirs_chunker.py
|
|
26
|
+
test/pykomodo/pykomodo_config.py
|
|
27
|
+
test/pykomodo/token_chunker.py
|
|
19
28
|
test/sample_repo/__init__.py
|
|
20
29
|
test/sample_repo/app.py
|
|
21
30
|
test/sample_repo/sample_repo/__init__.py
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import argparse
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
KOMODO_VERSION = "0.2.5"
|
|
6
|
+
|
|
7
|
+
def launch_dashboard():
|
|
8
|
+
"""Launch the dashboard interface."""
|
|
9
|
+
try:
|
|
10
|
+
from pykomodo.dashboard import launch_dashboard
|
|
11
|
+
print("Starting Komodo Dashboard...")
|
|
12
|
+
demo = launch_dashboard()
|
|
13
|
+
demo.launch(
|
|
14
|
+
server_name="0.0.0.0",
|
|
15
|
+
server_port=7860,
|
|
16
|
+
share=False,
|
|
17
|
+
debug=False
|
|
18
|
+
)
|
|
19
|
+
except ImportError as e:
|
|
20
|
+
print(f"[Error] Dashboard dependencies not available: {e}", file=sys.stderr)
|
|
21
|
+
print("Please install gradio: pip install gradio", file=sys.stderr)
|
|
22
|
+
sys.exit(1)
|
|
23
|
+
except Exception as e:
|
|
24
|
+
print(f"[Error] Failed to launch dashboard: {e}", file=sys.stderr)
|
|
25
|
+
sys.exit(1)
|
|
26
|
+
|
|
27
|
+
def main():
|
|
28
|
+
"""Main entry point for the komodo CLI."""
|
|
29
|
+
parser = argparse.ArgumentParser(
|
|
30
|
+
description="Process and chunk codebase content with advanced chunking strategies."
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
parser.add_argument("--version", action="version", version=f"komodo {KOMODO_VERSION}")
|
|
34
|
+
|
|
35
|
+
parser.add_argument("--dashboard", action="store_true",
|
|
36
|
+
help="Launch the web-based dashboard interface")
|
|
37
|
+
|
|
38
|
+
parser.add_argument("dirs", nargs="*", default=["."],
|
|
39
|
+
help="Directories to process (default: current directory)")
|
|
40
|
+
|
|
41
|
+
chunk_group = parser.add_mutually_exclusive_group(required=False)
|
|
42
|
+
chunk_group.add_argument("--equal-chunks", type=int,
|
|
43
|
+
help="Split into N equal chunks")
|
|
44
|
+
chunk_group.add_argument("--max-chunk-size", type=int,
|
|
45
|
+
help="Maximum tokens/lines per chunk")
|
|
46
|
+
chunk_group.add_argument("--max-tokens", type=int,
|
|
47
|
+
help="Maximum tokens per chunk (token-based chunking)")
|
|
48
|
+
|
|
49
|
+
parser.add_argument("--output-dir", default="chunks",
|
|
50
|
+
help="Output directory for chunks (default: chunks)")
|
|
51
|
+
|
|
52
|
+
parser.add_argument("--ignore", action="append", default=[],
|
|
53
|
+
help="Repeatable. Each usage adds one ignore pattern. Example: --ignore '**/node_modules/**' --ignore 'venv'")
|
|
54
|
+
parser.add_argument("--unignore", action="append", default=[],
|
|
55
|
+
help="Repeatable. Each usage adds one unignore pattern. Example: --unignore '*.md'")
|
|
56
|
+
|
|
57
|
+
parser.add_argument("--dry-run", action="store_true",
|
|
58
|
+
help="Show which files would be processed, but do not generate any chunks.")
|
|
59
|
+
|
|
60
|
+
parser.add_argument("--priority", action="append", default=[],
|
|
61
|
+
help="Priority rules in format 'pattern,score' (repeatable). Example: --priority '*.py,10' --priority 'file2.txt,20'")
|
|
62
|
+
|
|
63
|
+
parser.add_argument("--num-threads", type=int, default=4,
|
|
64
|
+
help="Number of processing threads (default: 4)")
|
|
65
|
+
|
|
66
|
+
parser.add_argument("--enhanced", action="store_true",
|
|
67
|
+
help="Enable LLM optimizations")
|
|
68
|
+
|
|
69
|
+
parser.add_argument("--semantic-chunks", action="store_true",
|
|
70
|
+
help="Use AST-based chunking for .py files (splits by top-level functions/classes)")
|
|
71
|
+
|
|
72
|
+
parser.add_argument("--context-window", type=int, default=4096,
|
|
73
|
+
help="Target LLM context window size (default: 4096)")
|
|
74
|
+
parser.add_argument("--min-relevance", type=float, default=0.3,
|
|
75
|
+
help="Minimum relevance score 0.0-1.0 (default: 0.3)")
|
|
76
|
+
parser.add_argument("--no-metadata", action="store_true",
|
|
77
|
+
help="Disable metadata extraction")
|
|
78
|
+
parser.add_argument("--keep-redundant", action="store_true",
|
|
79
|
+
help="Keep redundant content")
|
|
80
|
+
parser.add_argument("--no-summaries", action="store_true",
|
|
81
|
+
help="Disable summary generation")
|
|
82
|
+
|
|
83
|
+
parser.add_argument("--file-type", type=str,
|
|
84
|
+
help="Only chunk files of this type (e.g., 'pdf', 'py')")
|
|
85
|
+
|
|
86
|
+
parser.add_argument("--verbose", action="store_true",
|
|
87
|
+
help="Enable verbose output")
|
|
88
|
+
|
|
89
|
+
args = parser.parse_args()
|
|
90
|
+
|
|
91
|
+
if args.dashboard:
|
|
92
|
+
launch_dashboard()
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
if not any([args.equal_chunks, args.max_chunk_size, args.max_tokens]):
|
|
96
|
+
parser.error("One of --equal-chunks, --max-chunk-size, or --max-tokens is required (unless using --dashboard)")
|
|
97
|
+
|
|
98
|
+
if args.output_dir:
|
|
99
|
+
os.makedirs(args.output_dir, exist_ok=True)
|
|
100
|
+
|
|
101
|
+
priority_rules = []
|
|
102
|
+
for rule in args.priority:
|
|
103
|
+
if not rule:
|
|
104
|
+
continue
|
|
105
|
+
try:
|
|
106
|
+
pattern, score = rule.split(",", 1)
|
|
107
|
+
priority_rules.append((pattern.strip(), int(score.strip())))
|
|
108
|
+
except ValueError:
|
|
109
|
+
print(f"[Error] Priority rule must be 'pattern,score': {rule}",
|
|
110
|
+
file=sys.stderr)
|
|
111
|
+
sys.exit(1)
|
|
112
|
+
|
|
113
|
+
chunker = None
|
|
114
|
+
try:
|
|
115
|
+
if args.max_tokens:
|
|
116
|
+
try:
|
|
117
|
+
from pykomodo.token_chunker import TokenBasedChunker as ChunkerClass
|
|
118
|
+
if args.verbose:
|
|
119
|
+
print("Using TokenBasedChunker for token-based chunking")
|
|
120
|
+
except ImportError:
|
|
121
|
+
print("[Error] TokenBasedChunker not available. Please install tiktoken or update pykomodo.",
|
|
122
|
+
file=sys.stderr)
|
|
123
|
+
sys.exit(1)
|
|
124
|
+
|
|
125
|
+
chunker_args = {
|
|
126
|
+
"max_tokens_per_chunk": args.max_tokens,
|
|
127
|
+
"output_dir": args.output_dir,
|
|
128
|
+
"user_ignore": args.ignore,
|
|
129
|
+
"user_unignore": args.unignore,
|
|
130
|
+
"priority_rules": priority_rules,
|
|
131
|
+
"num_threads": args.num_threads,
|
|
132
|
+
"dry_run": args.dry_run,
|
|
133
|
+
"semantic_chunking": args.semantic_chunks,
|
|
134
|
+
"file_type": args.file_type,
|
|
135
|
+
"verbose": args.verbose
|
|
136
|
+
}
|
|
137
|
+
else:
|
|
138
|
+
if args.enhanced:
|
|
139
|
+
from pykomodo.enhanced_chunker import EnhancedParallelChunker as ChunkerClass
|
|
140
|
+
else:
|
|
141
|
+
from pykomodo.multi_dirs_chunker import ParallelChunker as ChunkerClass
|
|
142
|
+
|
|
143
|
+
chunker_args = {
|
|
144
|
+
"equal_chunks": args.equal_chunks,
|
|
145
|
+
"max_chunk_size": args.max_chunk_size,
|
|
146
|
+
"output_dir": args.output_dir,
|
|
147
|
+
"user_ignore": args.ignore,
|
|
148
|
+
"user_unignore": args.unignore,
|
|
149
|
+
"priority_rules": priority_rules,
|
|
150
|
+
"num_threads": args.num_threads,
|
|
151
|
+
"dry_run": args.dry_run,
|
|
152
|
+
"semantic_chunking": args.semantic_chunks,
|
|
153
|
+
"file_type": args.file_type
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
if args.enhanced:
|
|
157
|
+
chunker_args.update({
|
|
158
|
+
"extract_metadata": not args.no_metadata,
|
|
159
|
+
"add_summaries": not args.no_summaries,
|
|
160
|
+
"remove_redundancy": not args.keep_redundant,
|
|
161
|
+
"context_window": args.context_window,
|
|
162
|
+
"min_relevance_score": args.min_relevance
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
chunker = ChunkerClass(**chunker_args)
|
|
166
|
+
chunker.process_directories(args.dirs)
|
|
167
|
+
|
|
168
|
+
except Exception as e:
|
|
169
|
+
print(f"[Error] Processing failed: {e}", file=sys.stderr)
|
|
170
|
+
sys.exit(1)
|
|
171
|
+
finally:
|
|
172
|
+
if chunker and hasattr(chunker, 'close'):
|
|
173
|
+
chunker.close()
|
|
174
|
+
|
|
175
|
+
if __name__ == "__main__":
|
|
176
|
+
main()
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# src/config.py
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class PriorityRule:
|
|
9
|
+
pattern: str
|
|
10
|
+
score: int
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class KomodoConfig:
|
|
14
|
+
max_size: int = 10 * 1024 * 1024
|
|
15
|
+
token_mode: bool = False
|
|
16
|
+
output_dir: Optional[Path] = None
|
|
17
|
+
stream: bool = False
|
|
18
|
+
ignore_patterns: list[str] = None
|
|
19
|
+
priority_rules: list[PriorityRule] = None
|
|
20
|
+
binary_extensions: list[str] = None
|