skylos 1.0.9__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skylos might be problematic. Click here for more details.

skylos/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from skylos.analyzer import analyze
2
2
 
3
- __version__ = "1.0.9"
3
+ __version__ = "1.0.11"
4
4
 
5
5
  def debug_test():
6
6
  return "debug-ok"
skylos/analyzer.py CHANGED
@@ -26,7 +26,6 @@ class Skylos:
26
26
  return".".join(p)
27
27
 
28
28
  def _mark_exports(self):
29
-
30
29
  for name, d in self.defs.items():
31
30
  if d.in_init and not d.simple_name.startswith('_'):
32
31
  d.is_exported = True
@@ -71,7 +70,6 @@ class Skylos:
71
70
  d.references += 1
72
71
 
73
72
  def _get_base_classes(self, class_name):
74
- """Get base classes for a given class name"""
75
73
  if class_name not in self.defs:
76
74
  return []
77
75
 
@@ -83,7 +81,6 @@ class Skylos:
83
81
  return []
84
82
 
85
83
  def _apply_heuristics(self):
86
-
87
84
  class_methods=defaultdict(list)
88
85
  for d in self.defs.values():
89
86
  if d.type in("method","function") and"." in d.name:
@@ -97,17 +94,30 @@ class Skylos:
97
94
  if m.simple_name in AUTO_CALLED:m.references+=1
98
95
 
99
96
  for d in self.defs.values():
100
- if d.simple_name in MAGIC_METHODS or d.simple_name.startswith("__")and d.simple_name.endswith("__"):d.confidence=0
101
- if not d.simple_name.startswith("_")and d.type in("function","method","class"):d.confidence=min(d.confidence,90)
102
- if d.in_init and d.type in("function","class"):d.confidence=min(d.confidence,85)
103
- if d.name.split(".")[0] in self.dynamic:d.confidence=min(d.confidence,50)
104
-
105
- for d in self.defs.values():
97
+ if d.simple_name in MAGIC_METHODS or (d.simple_name.startswith("__") and d.simple_name.endswith("__")):
98
+ d.confidence = 0
99
+
100
+ if d.type == "parameter" and d.simple_name in ("self", "cls"):
101
+ d.confidence = 0
102
+
103
+ if d.type != "parameter" and (d.simple_name in MAGIC_METHODS or (d.simple_name.startswith("__") and d.simple_name.endswith("__"))):
104
+ d.confidence = 0
105
+
106
+ if not d.simple_name.startswith("_") and d.type in ("function", "method", "class"):
107
+ d.confidence = min(d.confidence, 90)
108
+
109
+ if d.in_init and d.type in ("function", "class"):
110
+ d.confidence = min(d.confidence, 85)
111
+
112
+ if d.name.split(".")[0] in self.dynamic:
113
+ d.confidence = min(d.confidence, 60)
114
+
115
+ if d.type == "variable" and d.simple_name == "_":
116
+ d.confidence = 0
117
+
106
118
  if d.type == "method" and TEST_METHOD_PATTERN.match(d.simple_name):
107
- # check if its in a class that inherits from a test base class
108
119
  class_name = d.name.rsplit(".", 1)[0]
109
120
  class_simple_name = class_name.split(".")[-1]
110
- # class name suggests it's a test class, ignore test methods
111
121
  if "Test" in class_simple_name or class_simple_name.endswith("TestCase"):
112
122
  d.confidence = 0
113
123
 
@@ -133,10 +143,7 @@ class Skylos:
133
143
  self._mark_refs()
134
144
  self._apply_heuristics()
135
145
  self._mark_exports()
136
-
137
- # for name, d in self.defs.items():
138
- # print(f" {d.type} '{name}': {d.references} refs, exported: {d.is_exported}, confidence: {d.confidence}")
139
-
146
+
140
147
  thr = max(0, thr)
141
148
 
142
149
  unused = []
@@ -144,7 +151,14 @@ class Skylos:
144
151
  if d.references == 0 and not d.is_exported and d.confidence >= thr:
145
152
  unused.append(d.to_dict())
146
153
 
147
- result = {"unused_functions": [], "unused_imports": [], "unused_classes": []}
154
+ result = {
155
+ "unused_functions": [],
156
+ "unused_imports": [],
157
+ "unused_classes": [],
158
+ "unused_variables": [],
159
+ "unused_parameters": []
160
+ }
161
+
148
162
  for u in unused:
149
163
  if u["type"] in ("function", "method"):
150
164
  result["unused_functions"].append(u)
@@ -152,6 +166,10 @@ class Skylos:
152
166
  result["unused_imports"].append(u)
153
167
  elif u["type"] == "class":
154
168
  result["unused_classes"].append(u)
169
+ elif u["type"] == "variable":
170
+ result["unused_variables"].append(u)
171
+ elif u["type"] == "parameter":
172
+ result["unused_parameters"].append(u)
155
173
 
156
174
  return json.dumps(result, indent=2)
157
175
 
@@ -175,6 +193,60 @@ def analyze(path,conf=60):return Skylos().analyze(path,conf)
175
193
  if __name__=="__main__":
176
194
  if len(sys.argv)>1:
177
195
  p=sys.argv[1];c=int(sys.argv[2])if len(sys.argv)>2 else 60
178
- print(analyze(p,c))
196
+ result = analyze(p,c)
197
+
198
+ data = json.loads(result)
199
+ print("\n🔍 Python Static Analysis Results")
200
+ print("===================================\n")
201
+
202
+ total_items = sum(len(items) for items in data.values())
203
+
204
+ print("Summary:")
205
+ if data["unused_functions"]:
206
+ print(f" • Unreachable functions: {len(data['unused_functions'])}")
207
+ if data["unused_imports"]:
208
+ print(f" • Unused imports: {len(data['unused_imports'])}")
209
+ if data["unused_classes"]:
210
+ print(f" • Unused classes: {len(data['unused_classes'])}")
211
+ if data["unused_variables"]:
212
+ print(f" • Unused variables: {len(data['unused_variables'])}")
213
+
214
+ if data["unused_functions"]:
215
+ print("\n📦 Unreachable Functions")
216
+ print("=======================")
217
+ for i, func in enumerate(data["unused_functions"], 1):
218
+ print(f" {i}. {func['name']}")
219
+ print(f" └─ {func['file']}:{func['line']}")
220
+
221
+ if data["unused_imports"]:
222
+ print("\n📥 Unused Imports")
223
+ print("================")
224
+ for i, imp in enumerate(data["unused_imports"], 1):
225
+ print(f" {i}. {imp['simple_name']}")
226
+ print(f" └─ {imp['file']}:{imp['line']}")
227
+
228
+ if data["unused_classes"]:
229
+ print("\n📋 Unused Classes")
230
+ print("=================")
231
+ for i, cls in enumerate(data["unused_classes"], 1):
232
+ print(f" {i}. {cls['name']}")
233
+ print(f" └─ {cls['file']}:{cls['line']}")
234
+
235
+ if data["unused_variables"]:
236
+ print("\n📊 Unused Variables")
237
+ print("==================")
238
+ for i, var in enumerate(data["unused_variables"], 1):
239
+ print(f" {i}. {var['name']}")
240
+ print(f" └─ {var['file']}:{var['line']}")
241
+
242
+ print("\n" + "─" * 50)
243
+ print(f"Found {total_items} dead code items. Add this badge to your README:")
244
+ print(f"```markdown")
245
+ print(f"![Dead Code: {total_items}](https://img.shields.io/badge/Dead_Code-{total_items}_detected-orange?logo=codacy&logoColor=red)")
246
+ print(f"```")
247
+
248
+ print("\nNext steps:")
249
+ print(" • Use --interactive to select specific items to remove")
250
+ print(" • Use --dry-run to preview changes before applying them")
179
251
  else:
180
252
  print("Usage: python Skylos.py <path> [confidence_threshold]")
skylos/cli.py CHANGED
@@ -247,6 +247,8 @@ def main() -> None:
247
247
 
248
248
  unused_functions = result.get("unused_functions", [])
249
249
  unused_imports = result.get("unused_imports", [])
250
+ unused_parameters = result.get("unused_parameters", [])
251
+ unused_variables = result.get("unused_variables", [])
250
252
 
251
253
  logger.info(f"{Colors.CYAN}{Colors.BOLD}🔍 Python Static Analysis Results{Colors.RESET}")
252
254
  logger.info(f"{Colors.CYAN}{'=' * 35}{Colors.RESET}")
@@ -254,7 +256,10 @@ def main() -> None:
254
256
  logger.info(f"\n{Colors.BOLD}Summary:{Colors.RESET}")
255
257
  logger.info(f" • Unreachable functions: {Colors.YELLOW}{len(unused_functions)}{Colors.RESET}")
256
258
  logger.info(f" • Unused imports: {Colors.YELLOW}{len(unused_imports)}{Colors.RESET}")
257
-
259
+ logger.info(f" • Unused parameters: {Colors.YELLOW}{len(unused_parameters)}{Colors.RESET}")
260
+ logger.info(f" • Unused variables: {Colors.YELLOW}{len(unused_variables)}{Colors.RESET}")
261
+
262
+
258
263
  if args.interactive and (unused_functions or unused_imports):
259
264
  logger.info(f"\n{Colors.BOLD}Interactive Mode:{Colors.RESET}")
260
265
  selected_functions, selected_imports = interactive_selection(logger, unused_functions, unused_imports)
@@ -324,6 +329,24 @@ def main() -> None:
324
329
  else:
325
330
  logger.info(f"\n{Colors.GREEN}✓ All imports are being used!{Colors.RESET}")
326
331
 
332
+ if unused_parameters:
333
+ logger.info(f"\n{Colors.BLUE}{Colors.BOLD}🔧 Unused Parameters{Colors.RESET}")
334
+ logger.info(f"{Colors.BLUE}{'=' * 18}{Colors.RESET}")
335
+ for i, item in enumerate(unused_parameters, 1):
336
+ logger.info(f"{Colors.GRAY}{i:2d}. {Colors.RESET}{Colors.BLUE}{item['name']}{Colors.RESET}")
337
+ logger.info(f" {Colors.GRAY}└─ {item['file']}:{item['line']}{Colors.RESET}")
338
+ else:
339
+ logger.info(f"\n{Colors.GREEN}✓ All parameters are being used!{Colors.RESET}")
340
+
341
+ if unused_variables:
342
+ logger.info(f"\n{Colors.YELLOW}{Colors.BOLD}📊 Unused Variables{Colors.RESET}")
343
+ logger.info(f"{Colors.YELLOW}{'=' * 18}{Colors.RESET}")
344
+ for i, item in enumerate(unused_variables, 1):
345
+ logger.info(f"{Colors.GRAY}{i:2d}. {Colors.RESET}{Colors.YELLOW}{item['name']}{Colors.RESET}")
346
+ logger.info(f" {Colors.GRAY}└─ {item['file']}:{item['line']}{Colors.RESET}")
347
+ else:
348
+ logger.info(f"\n{Colors.GREEN}✓ All variables are being used!{Colors.RESET}")
349
+
327
350
  dead_code_count = len(unused_functions) + len(unused_imports)
328
351
  print_badge(dead_code_count, logger)
329
352
 
skylos/visitor.py CHANGED
@@ -52,6 +52,7 @@ class Visitor(ast.NodeVisitor):
52
52
  self.dyn=set()
53
53
  self.exports=set()
54
54
  self.current_function_scope = []
55
+ self.current_function_params = []
55
56
 
56
57
  def add_def(self,n,t,l):
57
58
  if n not in{d.name for d in self.defs}:self.defs.append(Definition(n,t,self.file,l))
@@ -85,17 +86,27 @@ class Visitor(ast.NodeVisitor):
85
86
  self.alias[a.asname or a.name.split(".")[-1]]=full
86
87
  self.add_def(full,"import",node.lineno)
87
88
 
88
- def visit_ImportFrom(self,node):
89
- if node.module is None:return
89
+ def visit_ImportFrom(self, node):
90
+ if node.module is None:
91
+ return
90
92
  for a in node.names:
91
- if a.name=="*":continue
92
- base=node.module
93
+ if a.name == "*":
94
+ continue
95
+ base = node.module
93
96
  if node.level:
94
- parts=self.mod.split(".")
95
- base=".".join(parts[:-node.level])+(f".{node.module}"if node.module else"")
96
- full=f"{base}.{a.name}"
97
- self.alias[a.asname or a.name]=full
98
- self.add_def(full,"import",node.lineno)
97
+ parts = self.mod.split(".")
98
+ base = ".".join(parts[:-node.level]) + (f".{node.module}" if node.module else "")
99
+
100
+ full = f"{base}.{a.name}"
101
+
102
+ if a.asname:
103
+ alias_full = f"{self.mod}.{a.asname}" if self.mod else a.asname
104
+ self.add_def(alias_full, "import", node.lineno)
105
+ self.alias[a.asname] = full
106
+ self.add_ref(full)
107
+ else:
108
+ self.alias[a.name] = full
109
+ self.add_def(full, "import", node.lineno)
99
110
 
100
111
  def visit_arguments(self, args):
101
112
  for arg in args.args:
@@ -128,15 +139,25 @@ class Visitor(ast.NodeVisitor):
128
139
 
129
140
  self.current_function_scope.append(node.name)
130
141
 
142
+ old_params = self.current_function_params
143
+ self.current_function_params = []
144
+
131
145
  for d_node in node.decorator_list:
132
146
  self.visit(d_node)
133
147
 
148
+ for arg in node.args.args:
149
+ param_name = f"{qualified_name}.{arg.arg}"
150
+ self.add_def(param_name, "parameter", node.lineno)
151
+ self.current_function_params.append((arg.arg, param_name))
152
+
134
153
  self.visit_arguments(node.args)
135
154
  self.visit_annotation(node.returns)
136
155
 
137
156
  for stmt in node.body:
138
157
  self.visit(stmt)
158
+
139
159
  self.current_function_scope.pop()
160
+ self.current_function_params = old_params
140
161
 
141
162
  visit_AsyncFunctionDef=visit_FunctionDef
142
163
 
@@ -178,6 +199,30 @@ class Visitor(ast.NodeVisitor):
178
199
  self.visit(node.step)
179
200
 
180
201
  def visit_Assign(self, node):
202
+ def process_target_for_def(target_node):
203
+ if isinstance(target_node, ast.Name):
204
+ var_name_simple = target_node.id
205
+ if var_name_simple == "__all__" and not self.current_function_scope and not self.cls:
206
+ return
207
+
208
+ scope_parts = [self.mod]
209
+ if self.cls:
210
+ scope_parts.append(self.cls)
211
+ if self.current_function_scope:
212
+ scope_parts.extend(self.current_function_scope)
213
+
214
+ prefix = '.'.join(filter(None, scope_parts))
215
+ qualified_var_name = f"{prefix}.{var_name_simple}" if prefix else var_name_simple
216
+
217
+ self.add_def(qualified_var_name, "variable", target_node.lineno)
218
+
219
+ elif isinstance(target_node, (ast.Tuple, ast.List)):
220
+ for elt in target_node.elts:
221
+ process_target_for_def(elt)
222
+
223
+ for t in node.targets:
224
+ process_target_for_def(t)
225
+
181
226
  for target in node.targets:
182
227
  if isinstance(target, ast.Name) and target.id == "__all__":
183
228
  if isinstance(node.value, (ast.List, ast.Tuple)):
@@ -189,9 +234,10 @@ class Visitor(ast.NodeVisitor):
189
234
  value = elt.s
190
235
 
191
236
  if value is not None:
192
- full_name = f"{self.mod}.{value}"
193
- self.add_ref(full_name)
194
- self.add_ref(value)
237
+ full_name_export = f"{self.mod}.{value}" if self.mod else value
238
+ self.add_ref(full_name_export)
239
+ self.add_ref(value)
240
+
195
241
  self.generic_visit(node)
196
242
 
197
243
  def visit_Call(self, node):
@@ -219,12 +265,26 @@ class Visitor(ast.NodeVisitor):
219
265
 
220
266
  def visit_Name(self,node):
221
267
  if isinstance(node.ctx,ast.Load):
222
- self.add_ref(self.qual(node.id))
223
- if node.id in DYNAMIC_PATTERNS:self.dyn.add(self.mod.split(".")[0])
268
+ for param_name, param_full_name in self.current_function_params:
269
+ if node.id == param_name:
270
+ self.add_ref(param_full_name)
271
+ break
272
+ else:
273
+ # not parameter, handle normally
274
+ self.add_ref(self.qual(node.id))
275
+ if node.id in DYNAMIC_PATTERNS:
276
+ self.dyn.add(self.mod.split(".")[0])
224
277
 
225
- def visit_Attribute(self,node):
278
+ def visit_Attribute(self, node):
226
279
  self.generic_visit(node)
227
- if isinstance(node.ctx,ast.Load)and isinstance(node.value,ast.Name):
280
+ if isinstance(node.ctx, ast.Load) and isinstance(node.value, ast.Name):
281
+ if node.value.id in [param_name for param_name, _ in self.current_function_params]:
282
+ # mark parameter as referenced
283
+ for param_name, param_full_name in self.current_function_params:
284
+ if node.value.id == param_name:
285
+ self.add_ref(param_full_name)
286
+ break
287
+
228
288
  self.add_ref(f"{self.qual(node.value.id)}.{node.attr}")
229
289
 
230
290
  def visit_keyword(self, node):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skylos
3
- Version: 1.0.9
3
+ Version: 1.0.11
4
4
  Summary: A static analysis tool for Python codebases
5
5
  Author-email: oha <aaronoh2015@gmail.com>
6
6
  Requires-Python: >=3.9
@@ -0,0 +1,30 @@
1
+ skylos/__init__.py,sha256=QbF0FyEwDpVFNQtCN9GQCkJGWkj8mU_WVHzFYrpGPvg,152
2
+ skylos/analyzer.py,sha256=CYJSf1jMMEvacFolKXs197fT9FYPFb-D29VtU3vO6vA,10247
3
+ skylos/cli.py,sha256=1ZY95i9pegj62PrwNrwolBRydgO3H4sKF5Wy0TndTrk,14978
4
+ skylos/visitor.py,sha256=P42ob1RKOGhMLh-gHOw5L7fVI0TUxEThFyPbDe6z5D0,11827
5
+ test/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ test/compare_tools.py,sha256=0g9PDeJlbst-7hOaQzrL4MiJFQKpqM8q8VeBGzpPczg,22738
7
+ test/diagnostics.py,sha256=ExuFOCVpc9BDwNYapU96vj9RXLqxji32Sv6wVF4nJYU,13802
8
+ test/test_skylos.py,sha256=kz77STrS4k3Eez5RDYwGxOg2WH3e7zNZPUYEaTLbGTs,15608
9
+ test/test_visitor.py,sha256=bxUY_Zn_gLadZlz_n3Mu6rhVcExqElISwwVBo4eqVAY,7337
10
+ test/pykomodo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
+ test/pykomodo/command_line.py,sha256=3-khuenVjWZjvrvOrr_1K5lMTUmZa-B759B7k77Odpc,7384
12
+ test/pykomodo/config.py,sha256=UddY0sDIRlsZApnlQF70VE7b9KMsOoY_3yqFPtB71jw,453
13
+ test/pykomodo/core.py,sha256=rzoGibPwXr1efAgVtkcvhtM9ZETWWloCVi9weshxa4Y,3841
14
+ test/pykomodo/dashboard.py,sha256=wIvU8aq7vGhcs1bBfwGlXy3AQizmljySf7Of82LmgwI,21688
15
+ test/pykomodo/enhanced_chunker.py,sha256=nRTFSEyAkm4GilWEhJhPAZ67mjg4cEgW8oC6FaYxorY,12062
16
+ test/pykomodo/multi_dirs_chunker.py,sha256=Gz56V3RiBQX5ygeRsukqGRdCcJIb40w2082Xyu3UPOg,29184
17
+ test/pykomodo/pykomodo_config.py,sha256=CquUm_XvCm6XScWv4euyW-DwVgkgzGPHDlL3uB37Fxo,2389
18
+ test/pykomodo/token_chunker.py,sha256=p-zqhMdT_0h-jbIVeKon6sl3BBRNvbv8XAVDJB6qNSE,19530
19
+ test/sample_repo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ test/sample_repo/app.py,sha256=M5XgoAn-LPz50mKAj_ZacRKf-Pg7I4HbjWP7Z9jE4a0,226
21
+ test/sample_repo/sample_repo/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ test/sample_repo/sample_repo/commands.py,sha256=b6gQ9YDabt2yyfqGbOpLo0osF7wya8O4Lm7m8gtCr3g,2575
23
+ test/sample_repo/sample_repo/models.py,sha256=xXIg3pToEZwKuUCmKX2vTlCF_VeFA0yZlvlBVPIy5Qw,3320
24
+ test/sample_repo/sample_repo/routes.py,sha256=8yITrt55BwS01G7nWdESdx8LuxmReqop1zrGUKPeLi8,2475
25
+ test/sample_repo/sample_repo/utils.py,sha256=S56hEYh8wkzwsD260MvQcmUFOkw2EjFU27nMLFE6G2k,1103
26
+ skylos-1.0.11.dist-info/METADATA,sha256=3brmk_HqJmxWAwyyEbzEW-xNC6afwiBb0CyCpnJTNIw,225
27
+ skylos-1.0.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
28
+ skylos-1.0.11.dist-info/entry_points.txt,sha256=zzRpN2ByznlQoLeuLolS_TFNYSQxUGBL1EXQsAd6bIA,43
29
+ skylos-1.0.11.dist-info/top_level.txt,sha256=f8GA_7KwfaEopPMP8-EXDQXaqd4IbsOQPakZy01LkdQ,12
30
+ skylos-1.0.11.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.7.1)
2
+ Generator: setuptools (80.9.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
File without changes
@@ -0,0 +1,176 @@
1
+ import sys
2
+ import argparse
3
+ import os
4
+
5
+ KOMODO_VERSION = "0.2.5"
6
+
7
+ def launch_dashboard():
8
+ """Launch the dashboard interface."""
9
+ try:
10
+ from pykomodo.dashboard import launch_dashboard
11
+ print("Starting Komodo Dashboard...")
12
+ demo = launch_dashboard()
13
+ demo.launch(
14
+ server_name="0.0.0.0",
15
+ server_port=7860,
16
+ share=False,
17
+ debug=False
18
+ )
19
+ except ImportError as e:
20
+ print(f"[Error] Dashboard dependencies not available: {e}", file=sys.stderr)
21
+ print("Please install gradio: pip install gradio", file=sys.stderr)
22
+ sys.exit(1)
23
+ except Exception as e:
24
+ print(f"[Error] Failed to launch dashboard: {e}", file=sys.stderr)
25
+ sys.exit(1)
26
+
27
+ def main():
28
+ """Main entry point for the komodo CLI."""
29
+ parser = argparse.ArgumentParser(
30
+ description="Process and chunk codebase content with advanced chunking strategies."
31
+ )
32
+
33
+ parser.add_argument("--version", action="version", version=f"komodo {KOMODO_VERSION}")
34
+
35
+ parser.add_argument("--dashboard", action="store_true",
36
+ help="Launch the web-based dashboard interface")
37
+
38
+ parser.add_argument("dirs", nargs="*", default=["."],
39
+ help="Directories to process (default: current directory)")
40
+
41
+ chunk_group = parser.add_mutually_exclusive_group(required=False)
42
+ chunk_group.add_argument("--equal-chunks", type=int,
43
+ help="Split into N equal chunks")
44
+ chunk_group.add_argument("--max-chunk-size", type=int,
45
+ help="Maximum tokens/lines per chunk")
46
+ chunk_group.add_argument("--max-tokens", type=int,
47
+ help="Maximum tokens per chunk (token-based chunking)")
48
+
49
+ parser.add_argument("--output-dir", default="chunks",
50
+ help="Output directory for chunks (default: chunks)")
51
+
52
+ parser.add_argument("--ignore", action="append", default=[],
53
+ help="Repeatable. Each usage adds one ignore pattern. Example: --ignore '**/node_modules/**' --ignore 'venv'")
54
+ parser.add_argument("--unignore", action="append", default=[],
55
+ help="Repeatable. Each usage adds one unignore pattern. Example: --unignore '*.md'")
56
+
57
+ parser.add_argument("--dry-run", action="store_true",
58
+ help="Show which files would be processed, but do not generate any chunks.")
59
+
60
+ parser.add_argument("--priority", action="append", default=[],
61
+ help="Priority rules in format 'pattern,score' (repeatable). Example: --priority '*.py,10' --priority 'file2.txt,20'")
62
+
63
+ parser.add_argument("--num-threads", type=int, default=4,
64
+ help="Number of processing threads (default: 4)")
65
+
66
+ parser.add_argument("--enhanced", action="store_true",
67
+ help="Enable LLM optimizations")
68
+
69
+ parser.add_argument("--semantic-chunks", action="store_true",
70
+ help="Use AST-based chunking for .py files (splits by top-level functions/classes)")
71
+
72
+ parser.add_argument("--context-window", type=int, default=4096,
73
+ help="Target LLM context window size (default: 4096)")
74
+ parser.add_argument("--min-relevance", type=float, default=0.3,
75
+ help="Minimum relevance score 0.0-1.0 (default: 0.3)")
76
+ parser.add_argument("--no-metadata", action="store_true",
77
+ help="Disable metadata extraction")
78
+ parser.add_argument("--keep-redundant", action="store_true",
79
+ help="Keep redundant content")
80
+ parser.add_argument("--no-summaries", action="store_true",
81
+ help="Disable summary generation")
82
+
83
+ parser.add_argument("--file-type", type=str,
84
+ help="Only chunk files of this type (e.g., 'pdf', 'py')")
85
+
86
+ parser.add_argument("--verbose", action="store_true",
87
+ help="Enable verbose output")
88
+
89
+ args = parser.parse_args()
90
+
91
+ if args.dashboard:
92
+ launch_dashboard()
93
+ return
94
+
95
+ if not any([args.equal_chunks, args.max_chunk_size, args.max_tokens]):
96
+ parser.error("One of --equal-chunks, --max-chunk-size, or --max-tokens is required (unless using --dashboard)")
97
+
98
+ if args.output_dir:
99
+ os.makedirs(args.output_dir, exist_ok=True)
100
+
101
+ priority_rules = []
102
+ for rule in args.priority:
103
+ if not rule:
104
+ continue
105
+ try:
106
+ pattern, score = rule.split(",", 1)
107
+ priority_rules.append((pattern.strip(), int(score.strip())))
108
+ except ValueError:
109
+ print(f"[Error] Priority rule must be 'pattern,score': {rule}",
110
+ file=sys.stderr)
111
+ sys.exit(1)
112
+
113
+ chunker = None
114
+ try:
115
+ if args.max_tokens:
116
+ try:
117
+ from pykomodo.token_chunker import TokenBasedChunker as ChunkerClass
118
+ if args.verbose:
119
+ print("Using TokenBasedChunker for token-based chunking")
120
+ except ImportError:
121
+ print("[Error] TokenBasedChunker not available. Please install tiktoken or update pykomodo.",
122
+ file=sys.stderr)
123
+ sys.exit(1)
124
+
125
+ chunker_args = {
126
+ "max_tokens_per_chunk": args.max_tokens,
127
+ "output_dir": args.output_dir,
128
+ "user_ignore": args.ignore,
129
+ "user_unignore": args.unignore,
130
+ "priority_rules": priority_rules,
131
+ "num_threads": args.num_threads,
132
+ "dry_run": args.dry_run,
133
+ "semantic_chunking": args.semantic_chunks,
134
+ "file_type": args.file_type,
135
+ "verbose": args.verbose
136
+ }
137
+ else:
138
+ if args.enhanced:
139
+ from pykomodo.enhanced_chunker import EnhancedParallelChunker as ChunkerClass
140
+ else:
141
+ from pykomodo.multi_dirs_chunker import ParallelChunker as ChunkerClass
142
+
143
+ chunker_args = {
144
+ "equal_chunks": args.equal_chunks,
145
+ "max_chunk_size": args.max_chunk_size,
146
+ "output_dir": args.output_dir,
147
+ "user_ignore": args.ignore,
148
+ "user_unignore": args.unignore,
149
+ "priority_rules": priority_rules,
150
+ "num_threads": args.num_threads,
151
+ "dry_run": args.dry_run,
152
+ "semantic_chunking": args.semantic_chunks,
153
+ "file_type": args.file_type
154
+ }
155
+
156
+ if args.enhanced:
157
+ chunker_args.update({
158
+ "extract_metadata": not args.no_metadata,
159
+ "add_summaries": not args.no_summaries,
160
+ "remove_redundancy": not args.keep_redundant,
161
+ "context_window": args.context_window,
162
+ "min_relevance_score": args.min_relevance
163
+ })
164
+
165
+ chunker = ChunkerClass(**chunker_args)
166
+ chunker.process_directories(args.dirs)
167
+
168
+ except Exception as e:
169
+ print(f"[Error] Processing failed: {e}", file=sys.stderr)
170
+ sys.exit(1)
171
+ finally:
172
+ if chunker and hasattr(chunker, 'close'):
173
+ chunker.close()
174
+
175
+ if __name__ == "__main__":
176
+ main()
@@ -0,0 +1,20 @@
1
+ # src/config.py
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ @dataclass
8
+ class PriorityRule:
9
+ pattern: str
10
+ score: int
11
+
12
+ @dataclass
13
+ class KomodoConfig:
14
+ max_size: int = 10 * 1024 * 1024
15
+ token_mode: bool = False
16
+ output_dir: Optional[Path] = None
17
+ stream: bool = False
18
+ ignore_patterns: list[str] = None
19
+ priority_rules: list[PriorityRule] = None
20
+ binary_extensions: list[str] = None