skylos 1.0.10__tar.gz → 1.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of skylos might be problematic. Click here for more details.

Files changed (36) hide show
  1. {skylos-1.0.10 → skylos-1.0.11}/PKG-INFO +1 -1
  2. {skylos-1.0.10 → skylos-1.0.11}/README.md +1 -1
  3. {skylos-1.0.10 → skylos-1.0.11}/pyproject.toml +1 -1
  4. {skylos-1.0.10 → skylos-1.0.11}/setup.py +1 -1
  5. {skylos-1.0.10 → skylos-1.0.11}/skylos/__init__.py +1 -1
  6. {skylos-1.0.10 → skylos-1.0.11}/skylos/analyzer.py +14 -2
  7. {skylos-1.0.10 → skylos-1.0.11}/skylos/cli.py +24 -1
  8. {skylos-1.0.10 → skylos-1.0.11}/skylos/visitor.py +76 -16
  9. {skylos-1.0.10 → skylos-1.0.11}/skylos.egg-info/PKG-INFO +1 -1
  10. {skylos-1.0.10 → skylos-1.0.11}/skylos.egg-info/SOURCES.txt +9 -0
  11. skylos-1.0.11/test/pykomodo/command_line.py +176 -0
  12. skylos-1.0.11/test/pykomodo/config.py +20 -0
  13. skylos-1.0.11/test/pykomodo/core.py +121 -0
  14. skylos-1.0.11/test/pykomodo/dashboard.py +608 -0
  15. skylos-1.0.11/test/pykomodo/enhanced_chunker.py +304 -0
  16. skylos-1.0.11/test/pykomodo/multi_dirs_chunker.py +783 -0
  17. skylos-1.0.11/test/pykomodo/pykomodo_config.py +68 -0
  18. skylos-1.0.11/test/pykomodo/token_chunker.py +470 -0
  19. skylos-1.0.11/test/sample_repo/sample_repo/__init__.py +0 -0
  20. {skylos-1.0.10 → skylos-1.0.11}/setup.cfg +0 -0
  21. {skylos-1.0.10 → skylos-1.0.11}/skylos.egg-info/dependency_links.txt +0 -0
  22. {skylos-1.0.10 → skylos-1.0.11}/skylos.egg-info/entry_points.txt +0 -0
  23. {skylos-1.0.10 → skylos-1.0.11}/skylos.egg-info/requires.txt +0 -0
  24. {skylos-1.0.10 → skylos-1.0.11}/skylos.egg-info/top_level.txt +0 -0
  25. {skylos-1.0.10 → skylos-1.0.11}/test/__init__.py +0 -0
  26. {skylos-1.0.10 → skylos-1.0.11}/test/compare_tools.py +0 -0
  27. {skylos-1.0.10 → skylos-1.0.11}/test/diagnostics.py +0 -0
  28. {skylos-1.0.10/test/sample_repo → skylos-1.0.11/test/pykomodo}/__init__.py +0 -0
  29. {skylos-1.0.10/test/sample_repo → skylos-1.0.11/test}/sample_repo/__init__.py +0 -0
  30. {skylos-1.0.10 → skylos-1.0.11}/test/sample_repo/app.py +0 -0
  31. {skylos-1.0.10 → skylos-1.0.11}/test/sample_repo/sample_repo/commands.py +0 -0
  32. {skylos-1.0.10 → skylos-1.0.11}/test/sample_repo/sample_repo/models.py +0 -0
  33. {skylos-1.0.10 → skylos-1.0.11}/test/sample_repo/sample_repo/routes.py +0 -0
  34. {skylos-1.0.10 → skylos-1.0.11}/test/sample_repo/sample_repo/utils.py +0 -0
  35. {skylos-1.0.10 → skylos-1.0.11}/test/test_skylos.py +0 -0
  36. {skylos-1.0.10 → skylos-1.0.11}/test/test_visitor.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skylos
3
- Version: 1.0.10
3
+ Version: 1.0.11
4
4
  Summary: A static analysis tool for Python codebases
5
5
  Author-email: oha <aaronoh2015@gmail.com>
6
6
  Requires-Python: >=3.9
@@ -212,7 +212,7 @@ We welcome contributions! Please read our [Contributing Guidelines](CONTRIBUTING
212
212
  5. Open a Pull Request
213
213
 
214
214
  ## Roadmap
215
-
215
+ - [ ] Add a production flag, to include dead codes that are used in test but not in the actual execution
216
216
  - [ ] Expand our test cases
217
217
  - [ ] Configuration file support
218
218
  - [ ] Custom analysis rules
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "skylos"
7
- version = "1.0.10"
7
+ version = "1.0.11"
8
8
  requires-python = ">=3.9"
9
9
  description = "A static analysis tool for Python codebases"
10
10
  authors = [{name = "oha", email = "aaronoh2015@gmail.com"}]
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="skylos",
5
- version="1.0.10",
5
+ version="1.0.11",
6
6
  packages=find_packages(),
7
7
  python_requires=">=3.9",
8
8
  install_requires=["inquirer>=3.0.0"],
@@ -1,6 +1,6 @@
1
1
  from skylos.analyzer import analyze
2
2
 
3
- __version__ = "1.0.10"
3
+ __version__ = "1.0.11"
4
4
 
5
5
  def debug_test():
6
6
  return "debug-ok"
@@ -97,6 +97,12 @@ class Skylos:
97
97
  if d.simple_name in MAGIC_METHODS or (d.simple_name.startswith("__") and d.simple_name.endswith("__")):
98
98
  d.confidence = 0
99
99
 
100
+ if d.type == "parameter" and d.simple_name in ("self", "cls"):
101
+ d.confidence = 0
102
+
103
+ if d.type != "parameter" and (d.simple_name in MAGIC_METHODS or (d.simple_name.startswith("__") and d.simple_name.endswith("__"))):
104
+ d.confidence = 0
105
+
100
106
  if not d.simple_name.startswith("_") and d.type in ("function", "method", "class"):
101
107
  d.confidence = min(d.confidence, 90)
102
108
 
@@ -106,6 +112,9 @@ class Skylos:
106
112
  if d.name.split(".")[0] in self.dynamic:
107
113
  d.confidence = min(d.confidence, 60)
108
114
 
115
+ if d.type == "variable" and d.simple_name == "_":
116
+ d.confidence = 0
117
+
109
118
  if d.type == "method" and TEST_METHOD_PATTERN.match(d.simple_name):
110
119
  class_name = d.name.rsplit(".", 1)[0]
111
120
  class_simple_name = class_name.split(".")[-1]
@@ -134,7 +143,7 @@ class Skylos:
134
143
  self._mark_refs()
135
144
  self._apply_heuristics()
136
145
  self._mark_exports()
137
-
146
+
138
147
  thr = max(0, thr)
139
148
 
140
149
  unused = []
@@ -146,7 +155,8 @@ class Skylos:
146
155
  "unused_functions": [],
147
156
  "unused_imports": [],
148
157
  "unused_classes": [],
149
- "unused_variables": []
158
+ "unused_variables": [],
159
+ "unused_parameters": []
150
160
  }
151
161
 
152
162
  for u in unused:
@@ -158,6 +168,8 @@ class Skylos:
158
168
  result["unused_classes"].append(u)
159
169
  elif u["type"] == "variable":
160
170
  result["unused_variables"].append(u)
171
+ elif u["type"] == "parameter":
172
+ result["unused_parameters"].append(u)
161
173
 
162
174
  return json.dumps(result, indent=2)
163
175
 
@@ -247,6 +247,8 @@ def main() -> None:
247
247
 
248
248
  unused_functions = result.get("unused_functions", [])
249
249
  unused_imports = result.get("unused_imports", [])
250
+ unused_parameters = result.get("unused_parameters", [])
251
+ unused_variables = result.get("unused_variables", [])
250
252
 
251
253
  logger.info(f"{Colors.CYAN}{Colors.BOLD}🔍 Python Static Analysis Results{Colors.RESET}")
252
254
  logger.info(f"{Colors.CYAN}{'=' * 35}{Colors.RESET}")
@@ -254,7 +256,10 @@ def main() -> None:
254
256
  logger.info(f"\n{Colors.BOLD}Summary:{Colors.RESET}")
255
257
  logger.info(f" • Unreachable functions: {Colors.YELLOW}{len(unused_functions)}{Colors.RESET}")
256
258
  logger.info(f" • Unused imports: {Colors.YELLOW}{len(unused_imports)}{Colors.RESET}")
257
-
259
+ logger.info(f" • Unused parameters: {Colors.YELLOW}{len(unused_parameters)}{Colors.RESET}")
260
+ logger.info(f" • Unused variables: {Colors.YELLOW}{len(unused_variables)}{Colors.RESET}")
261
+
262
+
258
263
  if args.interactive and (unused_functions or unused_imports):
259
264
  logger.info(f"\n{Colors.BOLD}Interactive Mode:{Colors.RESET}")
260
265
  selected_functions, selected_imports = interactive_selection(logger, unused_functions, unused_imports)
@@ -324,6 +329,24 @@ def main() -> None:
324
329
  else:
325
330
  logger.info(f"\n{Colors.GREEN}✓ All imports are being used!{Colors.RESET}")
326
331
 
332
+ if unused_parameters:
333
+ logger.info(f"\n{Colors.BLUE}{Colors.BOLD}🔧 Unused Parameters{Colors.RESET}")
334
+ logger.info(f"{Colors.BLUE}{'=' * 18}{Colors.RESET}")
335
+ for i, item in enumerate(unused_parameters, 1):
336
+ logger.info(f"{Colors.GRAY}{i:2d}. {Colors.RESET}{Colors.BLUE}{item['name']}{Colors.RESET}")
337
+ logger.info(f" {Colors.GRAY}└─ {item['file']}:{item['line']}{Colors.RESET}")
338
+ else:
339
+ logger.info(f"\n{Colors.GREEN}✓ All parameters are being used!{Colors.RESET}")
340
+
341
+ if unused_variables:
342
+ logger.info(f"\n{Colors.YELLOW}{Colors.BOLD}📊 Unused Variables{Colors.RESET}")
343
+ logger.info(f"{Colors.YELLOW}{'=' * 18}{Colors.RESET}")
344
+ for i, item in enumerate(unused_variables, 1):
345
+ logger.info(f"{Colors.GRAY}{i:2d}. {Colors.RESET}{Colors.YELLOW}{item['name']}{Colors.RESET}")
346
+ logger.info(f" {Colors.GRAY}└─ {item['file']}:{item['line']}{Colors.RESET}")
347
+ else:
348
+ logger.info(f"\n{Colors.GREEN}✓ All variables are being used!{Colors.RESET}")
349
+
327
350
  dead_code_count = len(unused_functions) + len(unused_imports)
328
351
  print_badge(dead_code_count, logger)
329
352
 
@@ -52,6 +52,7 @@ class Visitor(ast.NodeVisitor):
52
52
  self.dyn=set()
53
53
  self.exports=set()
54
54
  self.current_function_scope = []
55
+ self.current_function_params = []
55
56
 
56
57
  def add_def(self,n,t,l):
57
58
  if n not in{d.name for d in self.defs}:self.defs.append(Definition(n,t,self.file,l))
@@ -85,17 +86,27 @@ class Visitor(ast.NodeVisitor):
85
86
  self.alias[a.asname or a.name.split(".")[-1]]=full
86
87
  self.add_def(full,"import",node.lineno)
87
88
 
88
- def visit_ImportFrom(self,node):
89
- if node.module is None:return
89
+ def visit_ImportFrom(self, node):
90
+ if node.module is None:
91
+ return
90
92
  for a in node.names:
91
- if a.name=="*":continue
92
- base=node.module
93
+ if a.name == "*":
94
+ continue
95
+ base = node.module
93
96
  if node.level:
94
- parts=self.mod.split(".")
95
- base=".".join(parts[:-node.level])+(f".{node.module}"if node.module else"")
96
- full=f"{base}.{a.name}"
97
- self.alias[a.asname or a.name]=full
98
- self.add_def(full,"import",node.lineno)
97
+ parts = self.mod.split(".")
98
+ base = ".".join(parts[:-node.level]) + (f".{node.module}" if node.module else "")
99
+
100
+ full = f"{base}.{a.name}"
101
+
102
+ if a.asname:
103
+ alias_full = f"{self.mod}.{a.asname}" if self.mod else a.asname
104
+ self.add_def(alias_full, "import", node.lineno)
105
+ self.alias[a.asname] = full
106
+ self.add_ref(full)
107
+ else:
108
+ self.alias[a.name] = full
109
+ self.add_def(full, "import", node.lineno)
99
110
 
100
111
  def visit_arguments(self, args):
101
112
  for arg in args.args:
@@ -128,15 +139,25 @@ class Visitor(ast.NodeVisitor):
128
139
 
129
140
  self.current_function_scope.append(node.name)
130
141
 
142
+ old_params = self.current_function_params
143
+ self.current_function_params = []
144
+
131
145
  for d_node in node.decorator_list:
132
146
  self.visit(d_node)
133
147
 
148
+ for arg in node.args.args:
149
+ param_name = f"{qualified_name}.{arg.arg}"
150
+ self.add_def(param_name, "parameter", node.lineno)
151
+ self.current_function_params.append((arg.arg, param_name))
152
+
134
153
  self.visit_arguments(node.args)
135
154
  self.visit_annotation(node.returns)
136
155
 
137
156
  for stmt in node.body:
138
157
  self.visit(stmt)
158
+
139
159
  self.current_function_scope.pop()
160
+ self.current_function_params = old_params
140
161
 
141
162
  visit_AsyncFunctionDef=visit_FunctionDef
142
163
 
@@ -178,6 +199,30 @@ class Visitor(ast.NodeVisitor):
178
199
  self.visit(node.step)
179
200
 
180
201
  def visit_Assign(self, node):
202
+ def process_target_for_def(target_node):
203
+ if isinstance(target_node, ast.Name):
204
+ var_name_simple = target_node.id
205
+ if var_name_simple == "__all__" and not self.current_function_scope and not self.cls:
206
+ return
207
+
208
+ scope_parts = [self.mod]
209
+ if self.cls:
210
+ scope_parts.append(self.cls)
211
+ if self.current_function_scope:
212
+ scope_parts.extend(self.current_function_scope)
213
+
214
+ prefix = '.'.join(filter(None, scope_parts))
215
+ qualified_var_name = f"{prefix}.{var_name_simple}" if prefix else var_name_simple
216
+
217
+ self.add_def(qualified_var_name, "variable", target_node.lineno)
218
+
219
+ elif isinstance(target_node, (ast.Tuple, ast.List)):
220
+ for elt in target_node.elts:
221
+ process_target_for_def(elt)
222
+
223
+ for t in node.targets:
224
+ process_target_for_def(t)
225
+
181
226
  for target in node.targets:
182
227
  if isinstance(target, ast.Name) and target.id == "__all__":
183
228
  if isinstance(node.value, (ast.List, ast.Tuple)):
@@ -189,9 +234,10 @@ class Visitor(ast.NodeVisitor):
189
234
  value = elt.s
190
235
 
191
236
  if value is not None:
192
- full_name = f"{self.mod}.{value}"
193
- self.add_ref(full_name)
194
- self.add_ref(value)
237
+ full_name_export = f"{self.mod}.{value}" if self.mod else value
238
+ self.add_ref(full_name_export)
239
+ self.add_ref(value)
240
+
195
241
  self.generic_visit(node)
196
242
 
197
243
  def visit_Call(self, node):
@@ -219,12 +265,26 @@ class Visitor(ast.NodeVisitor):
219
265
 
220
266
  def visit_Name(self,node):
221
267
  if isinstance(node.ctx,ast.Load):
222
- self.add_ref(self.qual(node.id))
223
- if node.id in DYNAMIC_PATTERNS:self.dyn.add(self.mod.split(".")[0])
268
+ for param_name, param_full_name in self.current_function_params:
269
+ if node.id == param_name:
270
+ self.add_ref(param_full_name)
271
+ break
272
+ else:
273
+ # not parameter, handle normally
274
+ self.add_ref(self.qual(node.id))
275
+ if node.id in DYNAMIC_PATTERNS:
276
+ self.dyn.add(self.mod.split(".")[0])
224
277
 
225
- def visit_Attribute(self,node):
278
+ def visit_Attribute(self, node):
226
279
  self.generic_visit(node)
227
- if isinstance(node.ctx,ast.Load)and isinstance(node.value,ast.Name):
280
+ if isinstance(node.ctx, ast.Load) and isinstance(node.value, ast.Name):
281
+ if node.value.id in [param_name for param_name, _ in self.current_function_params]:
282
+ # mark parameter as referenced
283
+ for param_name, param_full_name in self.current_function_params:
284
+ if node.value.id == param_name:
285
+ self.add_ref(param_full_name)
286
+ break
287
+
228
288
  self.add_ref(f"{self.qual(node.value.id)}.{node.attr}")
229
289
 
230
290
  def visit_keyword(self, node):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: skylos
3
- Version: 1.0.10
3
+ Version: 1.0.11
4
4
  Summary: A static analysis tool for Python codebases
5
5
  Author-email: oha <aaronoh2015@gmail.com>
6
6
  Requires-Python: >=3.9
@@ -16,6 +16,15 @@ test/compare_tools.py
16
16
  test/diagnostics.py
17
17
  test/test_skylos.py
18
18
  test/test_visitor.py
19
+ test/pykomodo/__init__.py
20
+ test/pykomodo/command_line.py
21
+ test/pykomodo/config.py
22
+ test/pykomodo/core.py
23
+ test/pykomodo/dashboard.py
24
+ test/pykomodo/enhanced_chunker.py
25
+ test/pykomodo/multi_dirs_chunker.py
26
+ test/pykomodo/pykomodo_config.py
27
+ test/pykomodo/token_chunker.py
19
28
  test/sample_repo/__init__.py
20
29
  test/sample_repo/app.py
21
30
  test/sample_repo/sample_repo/__init__.py
@@ -0,0 +1,176 @@
1
+ import sys
2
+ import argparse
3
+ import os
4
+
5
+ KOMODO_VERSION = "0.2.5"
6
+
7
+ def launch_dashboard():
8
+ """Launch the dashboard interface."""
9
+ try:
10
+ from pykomodo.dashboard import launch_dashboard
11
+ print("Starting Komodo Dashboard...")
12
+ demo = launch_dashboard()
13
+ demo.launch(
14
+ server_name="0.0.0.0",
15
+ server_port=7860,
16
+ share=False,
17
+ debug=False
18
+ )
19
+ except ImportError as e:
20
+ print(f"[Error] Dashboard dependencies not available: {e}", file=sys.stderr)
21
+ print("Please install gradio: pip install gradio", file=sys.stderr)
22
+ sys.exit(1)
23
+ except Exception as e:
24
+ print(f"[Error] Failed to launch dashboard: {e}", file=sys.stderr)
25
+ sys.exit(1)
26
+
27
+ def main():
28
+ """Main entry point for the komodo CLI."""
29
+ parser = argparse.ArgumentParser(
30
+ description="Process and chunk codebase content with advanced chunking strategies."
31
+ )
32
+
33
+ parser.add_argument("--version", action="version", version=f"komodo {KOMODO_VERSION}")
34
+
35
+ parser.add_argument("--dashboard", action="store_true",
36
+ help="Launch the web-based dashboard interface")
37
+
38
+ parser.add_argument("dirs", nargs="*", default=["."],
39
+ help="Directories to process (default: current directory)")
40
+
41
+ chunk_group = parser.add_mutually_exclusive_group(required=False)
42
+ chunk_group.add_argument("--equal-chunks", type=int,
43
+ help="Split into N equal chunks")
44
+ chunk_group.add_argument("--max-chunk-size", type=int,
45
+ help="Maximum tokens/lines per chunk")
46
+ chunk_group.add_argument("--max-tokens", type=int,
47
+ help="Maximum tokens per chunk (token-based chunking)")
48
+
49
+ parser.add_argument("--output-dir", default="chunks",
50
+ help="Output directory for chunks (default: chunks)")
51
+
52
+ parser.add_argument("--ignore", action="append", default=[],
53
+ help="Repeatable. Each usage adds one ignore pattern. Example: --ignore '**/node_modules/**' --ignore 'venv'")
54
+ parser.add_argument("--unignore", action="append", default=[],
55
+ help="Repeatable. Each usage adds one unignore pattern. Example: --unignore '*.md'")
56
+
57
+ parser.add_argument("--dry-run", action="store_true",
58
+ help="Show which files would be processed, but do not generate any chunks.")
59
+
60
+ parser.add_argument("--priority", action="append", default=[],
61
+ help="Priority rules in format 'pattern,score' (repeatable). Example: --priority '*.py,10' --priority 'file2.txt,20'")
62
+
63
+ parser.add_argument("--num-threads", type=int, default=4,
64
+ help="Number of processing threads (default: 4)")
65
+
66
+ parser.add_argument("--enhanced", action="store_true",
67
+ help="Enable LLM optimizations")
68
+
69
+ parser.add_argument("--semantic-chunks", action="store_true",
70
+ help="Use AST-based chunking for .py files (splits by top-level functions/classes)")
71
+
72
+ parser.add_argument("--context-window", type=int, default=4096,
73
+ help="Target LLM context window size (default: 4096)")
74
+ parser.add_argument("--min-relevance", type=float, default=0.3,
75
+ help="Minimum relevance score 0.0-1.0 (default: 0.3)")
76
+ parser.add_argument("--no-metadata", action="store_true",
77
+ help="Disable metadata extraction")
78
+ parser.add_argument("--keep-redundant", action="store_true",
79
+ help="Keep redundant content")
80
+ parser.add_argument("--no-summaries", action="store_true",
81
+ help="Disable summary generation")
82
+
83
+ parser.add_argument("--file-type", type=str,
84
+ help="Only chunk files of this type (e.g., 'pdf', 'py')")
85
+
86
+ parser.add_argument("--verbose", action="store_true",
87
+ help="Enable verbose output")
88
+
89
+ args = parser.parse_args()
90
+
91
+ if args.dashboard:
92
+ launch_dashboard()
93
+ return
94
+
95
+ if not any([args.equal_chunks, args.max_chunk_size, args.max_tokens]):
96
+ parser.error("One of --equal-chunks, --max-chunk-size, or --max-tokens is required (unless using --dashboard)")
97
+
98
+ if args.output_dir:
99
+ os.makedirs(args.output_dir, exist_ok=True)
100
+
101
+ priority_rules = []
102
+ for rule in args.priority:
103
+ if not rule:
104
+ continue
105
+ try:
106
+ pattern, score = rule.split(",", 1)
107
+ priority_rules.append((pattern.strip(), int(score.strip())))
108
+ except ValueError:
109
+ print(f"[Error] Priority rule must be 'pattern,score': {rule}",
110
+ file=sys.stderr)
111
+ sys.exit(1)
112
+
113
+ chunker = None
114
+ try:
115
+ if args.max_tokens:
116
+ try:
117
+ from pykomodo.token_chunker import TokenBasedChunker as ChunkerClass
118
+ if args.verbose:
119
+ print("Using TokenBasedChunker for token-based chunking")
120
+ except ImportError:
121
+ print("[Error] TokenBasedChunker not available. Please install tiktoken or update pykomodo.",
122
+ file=sys.stderr)
123
+ sys.exit(1)
124
+
125
+ chunker_args = {
126
+ "max_tokens_per_chunk": args.max_tokens,
127
+ "output_dir": args.output_dir,
128
+ "user_ignore": args.ignore,
129
+ "user_unignore": args.unignore,
130
+ "priority_rules": priority_rules,
131
+ "num_threads": args.num_threads,
132
+ "dry_run": args.dry_run,
133
+ "semantic_chunking": args.semantic_chunks,
134
+ "file_type": args.file_type,
135
+ "verbose": args.verbose
136
+ }
137
+ else:
138
+ if args.enhanced:
139
+ from pykomodo.enhanced_chunker import EnhancedParallelChunker as ChunkerClass
140
+ else:
141
+ from pykomodo.multi_dirs_chunker import ParallelChunker as ChunkerClass
142
+
143
+ chunker_args = {
144
+ "equal_chunks": args.equal_chunks,
145
+ "max_chunk_size": args.max_chunk_size,
146
+ "output_dir": args.output_dir,
147
+ "user_ignore": args.ignore,
148
+ "user_unignore": args.unignore,
149
+ "priority_rules": priority_rules,
150
+ "num_threads": args.num_threads,
151
+ "dry_run": args.dry_run,
152
+ "semantic_chunking": args.semantic_chunks,
153
+ "file_type": args.file_type
154
+ }
155
+
156
+ if args.enhanced:
157
+ chunker_args.update({
158
+ "extract_metadata": not args.no_metadata,
159
+ "add_summaries": not args.no_summaries,
160
+ "remove_redundancy": not args.keep_redundant,
161
+ "context_window": args.context_window,
162
+ "min_relevance_score": args.min_relevance
163
+ })
164
+
165
+ chunker = ChunkerClass(**chunker_args)
166
+ chunker.process_directories(args.dirs)
167
+
168
+ except Exception as e:
169
+ print(f"[Error] Processing failed: {e}", file=sys.stderr)
170
+ sys.exit(1)
171
+ finally:
172
+ if chunker and hasattr(chunker, 'close'):
173
+ chunker.close()
174
+
175
+ if __name__ == "__main__":
176
+ main()
@@ -0,0 +1,20 @@
1
+ # src/config.py
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ @dataclass
8
+ class PriorityRule:
9
+ pattern: str
10
+ score: int
11
+
12
+ @dataclass
13
+ class KomodoConfig:
14
+ max_size: int = 10 * 1024 * 1024
15
+ token_mode: bool = False
16
+ output_dir: Optional[Path] = None
17
+ stream: bool = False
18
+ ignore_patterns: list[str] = None
19
+ priority_rules: list[PriorityRule] = None
20
+ binary_extensions: list[str] = None
@@ -0,0 +1,121 @@
1
+ import os
2
+ import fnmatch
3
+ from typing import List, Optional
4
+
5
+ class PriorityRule:
6
+ """
7
+ Simple Python container for (pattern, score).
8
+ """
9
+ def __init__(self, pattern, score):
10
+ self.pattern: str = pattern
11
+ self.score: int = score
12
+
13
+ class PyCConfig:
14
+ """
15
+ A pure Python equivalent of the 'PyCConfig' that in Cython
16
+ wrapped the 'CConfig' struct. This class maintains the same
17
+ conceptual fields but in Pythonic form (lists, strings, booleans).
18
+ """
19
+
20
+ def __init__(self):
21
+ self.max_size: int = 0
22
+ self.token_mode: bool = False
23
+ self.output_dir: Optional[str] = None
24
+ self.stream: bool = False
25
+
26
+ self.ignore_patterns: List[str] = []
27
+ self.unignore_patterns: List[str] = []
28
+ self.priority_rules: List[PriorityRule] = []
29
+ self.binary_exts: List[str] = []
30
+
31
+ def add_ignore_pattern(self, pattern: str) -> None:
32
+ """
33
+ Just appends to a Python list.
34
+ """
35
+ self.ignore_patterns.append(pattern)
36
+
37
+ def add_unignore_pattern(self, pattern: str) -> None:
38
+ self.unignore_patterns.append(pattern)
39
+
40
+ def add_priority_rule(self, pattern: str, score: int) -> None:
41
+ self.priority_rules.append(PriorityRule(pattern, score))
42
+
43
+ def should_ignore(self, path: str) -> bool:
44
+ """
45
+ Return True if path matches one of the ignore_patterns,
46
+ unless it matches unignore_patterns first.
47
+ """
48
+ for pat in self.unignore_patterns:
49
+ if fnmatch.fnmatch(path, pat):
50
+ return False
51
+
52
+ for pat in self.ignore_patterns:
53
+ if fnmatch.fnmatch(path, pat):
54
+ return True
55
+
56
+ return False
57
+
58
+ def calculate_priority(self, path: str) -> int:
59
+ """
60
+ Returns the highest score among any matching priority rule.
61
+ """
62
+ highest = 0
63
+ for rule in self.priority_rules:
64
+ if fnmatch.fnmatch(path, rule.pattern):
65
+ if rule.score > highest:
66
+ highest = rule.score
67
+ return highest
68
+
69
+ def is_binary_file(self, path: str) -> bool:
70
+ """
71
+ 1) If extension is in self.binary_exts -> True
72
+ 2) Else read up to 512 bytes, if it has a null byte -> True
73
+ 3) If can't open -> True
74
+ """
75
+ _, ext = os.path.splitext(path)
76
+ ext = ext.lstrip(".").lower()
77
+ if ext in (b.lower() for b in self.binary_exts):
78
+ return True
79
+
80
+ try:
81
+ with open(path, "rb") as f:
82
+ chunk = f.read(512)
83
+ except OSError:
84
+ return True
85
+
86
+ if b"\0" in chunk:
87
+ return True
88
+
89
+ return False
90
+
91
+ def read_file_contents(self, path: str) -> str:
92
+ """
93
+ Reads the entire file as text, returns it.
94
+ If can't open, return "<NULL>" or handle differently.
95
+ """
96
+ try:
97
+ with open(path, "rb") as f:
98
+ data = f.read()
99
+ return data.decode("utf-8", errors="replace")
100
+ except OSError:
101
+ return "<NULL>"
102
+
103
+ def count_tokens(self, text: str) -> int:
104
+ """
105
+ Replicates py_count_tokens:
106
+ Simple whitespace-based token counting in pure Python.
107
+ """
108
+ return len(text.split())
109
+
110
+ def make_c_string(self, text: Optional[str]) -> str:
111
+ if text is None:
112
+ return "<NULL>"
113
+ return text
114
+
115
+ def __repr__(self) -> str:
116
+ return (f"PyCConfig(max_size={self.max_size}, token_mode={self.token_mode}, "
117
+ f"output_dir={self.output_dir!r}, stream={self.stream}, "
118
+ f"ignore_patterns={self.ignore_patterns}, "
119
+ f"unignore_patterns={self.unignore_patterns}, "
120
+ f"priority_rules={[ (r.pattern, r.score) for r in self.priority_rules ]}, "
121
+ f"binary_exts={self.binary_exts})")