hdsp-jupyter-extension 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. agent_server/__init__.py +8 -0
  2. agent_server/core/__init__.py +92 -0
  3. agent_server/core/api_key_manager.py +427 -0
  4. agent_server/core/code_validator.py +1238 -0
  5. agent_server/core/context_condenser.py +308 -0
  6. agent_server/core/embedding_service.py +254 -0
  7. agent_server/core/error_classifier.py +577 -0
  8. agent_server/core/llm_client.py +95 -0
  9. agent_server/core/llm_service.py +649 -0
  10. agent_server/core/notebook_generator.py +274 -0
  11. agent_server/core/prompt_builder.py +35 -0
  12. agent_server/core/rag_manager.py +742 -0
  13. agent_server/core/reflection_engine.py +489 -0
  14. agent_server/core/retriever.py +248 -0
  15. agent_server/core/state_verifier.py +452 -0
  16. agent_server/core/summary_generator.py +484 -0
  17. agent_server/core/task_manager.py +198 -0
  18. agent_server/knowledge/__init__.py +9 -0
  19. agent_server/knowledge/watchdog_service.py +352 -0
  20. agent_server/main.py +160 -0
  21. agent_server/prompts/__init__.py +60 -0
  22. agent_server/prompts/file_action_prompts.py +113 -0
  23. agent_server/routers/__init__.py +9 -0
  24. agent_server/routers/agent.py +591 -0
  25. agent_server/routers/chat.py +188 -0
  26. agent_server/routers/config.py +100 -0
  27. agent_server/routers/file_resolver.py +293 -0
  28. agent_server/routers/health.py +42 -0
  29. agent_server/routers/rag.py +163 -0
  30. agent_server/schemas/__init__.py +60 -0
  31. hdsp_agent_core/__init__.py +158 -0
  32. hdsp_agent_core/factory.py +252 -0
  33. hdsp_agent_core/interfaces.py +203 -0
  34. hdsp_agent_core/knowledge/__init__.py +31 -0
  35. hdsp_agent_core/knowledge/chunking.py +356 -0
  36. hdsp_agent_core/knowledge/libraries/dask.md +188 -0
  37. hdsp_agent_core/knowledge/libraries/matplotlib.md +164 -0
  38. hdsp_agent_core/knowledge/libraries/polars.md +68 -0
  39. hdsp_agent_core/knowledge/loader.py +337 -0
  40. hdsp_agent_core/llm/__init__.py +13 -0
  41. hdsp_agent_core/llm/service.py +556 -0
  42. hdsp_agent_core/managers/__init__.py +22 -0
  43. hdsp_agent_core/managers/config_manager.py +133 -0
  44. hdsp_agent_core/managers/session_manager.py +251 -0
  45. hdsp_agent_core/models/__init__.py +115 -0
  46. hdsp_agent_core/models/agent.py +316 -0
  47. hdsp_agent_core/models/chat.py +41 -0
  48. hdsp_agent_core/models/common.py +95 -0
  49. hdsp_agent_core/models/rag.py +368 -0
  50. hdsp_agent_core/prompts/__init__.py +63 -0
  51. hdsp_agent_core/prompts/auto_agent_prompts.py +1260 -0
  52. hdsp_agent_core/prompts/cell_action_prompts.py +98 -0
  53. hdsp_agent_core/services/__init__.py +18 -0
  54. hdsp_agent_core/services/agent_service.py +438 -0
  55. hdsp_agent_core/services/chat_service.py +205 -0
  56. hdsp_agent_core/services/rag_service.py +262 -0
  57. hdsp_agent_core/tests/__init__.py +1 -0
  58. hdsp_agent_core/tests/conftest.py +102 -0
  59. hdsp_agent_core/tests/test_factory.py +251 -0
  60. hdsp_agent_core/tests/test_services.py +326 -0
  61. hdsp_jupyter_extension-2.0.0.data/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +7 -0
  62. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/build_log.json +738 -0
  63. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/install.json +5 -0
  64. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/package.json +134 -0
  65. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2607ff74c74acfa83158.js +4369 -0
  66. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2607ff74c74acfa83158.js.map +1 -0
  67. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.622c1a5918b3aafb2315.js +12496 -0
  68. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.622c1a5918b3aafb2315.js.map +1 -0
  69. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +94 -0
  70. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +1 -0
  71. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +94 -0
  72. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +1 -0
  73. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.dae97cde171e13b8c834.js +623 -0
  74. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.dae97cde171e13b8c834.js.map +1 -0
  75. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/style.js +4 -0
  76. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +507 -0
  77. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +1 -0
  78. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js +2071 -0
  79. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +1 -0
  80. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js +1059 -0
  81. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +1 -0
  82. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +376 -0
  83. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +1 -0
  84. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +60336 -0
  85. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +1 -0
  86. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js +7132 -0
  87. hdsp_jupyter_extension-2.0.0.data/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +1 -0
  88. hdsp_jupyter_extension-2.0.0.dist-info/METADATA +152 -0
  89. hdsp_jupyter_extension-2.0.0.dist-info/RECORD +121 -0
  90. hdsp_jupyter_extension-2.0.0.dist-info/WHEEL +4 -0
  91. hdsp_jupyter_extension-2.0.0.dist-info/licenses/LICENSE +21 -0
  92. jupyter_ext/__init__.py +233 -0
  93. jupyter_ext/_version.py +4 -0
  94. jupyter_ext/config.py +111 -0
  95. jupyter_ext/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +7 -0
  96. jupyter_ext/handlers.py +632 -0
  97. jupyter_ext/labextension/build_log.json +738 -0
  98. jupyter_ext/labextension/package.json +134 -0
  99. jupyter_ext/labextension/static/frontend_styles_index_js.2607ff74c74acfa83158.js +4369 -0
  100. jupyter_ext/labextension/static/frontend_styles_index_js.2607ff74c74acfa83158.js.map +1 -0
  101. jupyter_ext/labextension/static/lib_index_js.622c1a5918b3aafb2315.js +12496 -0
  102. jupyter_ext/labextension/static/lib_index_js.622c1a5918b3aafb2315.js.map +1 -0
  103. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +94 -0
  104. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +1 -0
  105. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +94 -0
  106. jupyter_ext/labextension/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +1 -0
  107. jupyter_ext/labextension/static/remoteEntry.dae97cde171e13b8c834.js +623 -0
  108. jupyter_ext/labextension/static/remoteEntry.dae97cde171e13b8c834.js.map +1 -0
  109. jupyter_ext/labextension/static/style.js +4 -0
  110. jupyter_ext/labextension/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +507 -0
  111. jupyter_ext/labextension/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +1 -0
  112. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js +2071 -0
  113. jupyter_ext/labextension/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js-node_modules-782ee5.d9ed8645ef1d311657d8.js.map +1 -0
  114. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js +1059 -0
  115. jupyter_ext/labextension/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.36b49c71871f98d4f549.js.map +1 -0
  116. jupyter_ext/labextension/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +376 -0
  117. jupyter_ext/labextension/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +1 -0
  118. jupyter_ext/labextension/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +60336 -0
  119. jupyter_ext/labextension/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +1 -0
  120. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js +7132 -0
  121. jupyter_ext/labextension/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.2e13df4ea61496e95d45.js.map +1 -0
@@ -0,0 +1,1238 @@
1
+ """
2
+ Code Validator Service
3
+ Ruff/Pyflakes/AST 기반 코드 품질 검증 서비스
4
+
5
+ 실행 전 코드의 문법 오류, 미정의 변수, 미사용 import,
6
+ 코딩 스타일, 보안 취약점 등을 사전 감지
7
+
8
+ 검증 도구:
9
+ - Ruff: 초고속 린터 (Rust 기반, 700+ 규칙)
10
+ - Pyflakes: 미사용 import/변수 감지 (fallback)
11
+ - AST: 구문 분석 및 의존성 추출
12
+ """
13
+
14
+ import ast
15
+ import os
16
+ import re
17
+ import subprocess
18
+ import tempfile
19
+ from dataclasses import dataclass, field
20
+ from enum import Enum
21
+ from io import StringIO
22
+ from typing import Any, Dict, List, Optional, Tuple
23
+
24
+
25
+ class IssueSeverity(Enum):
26
+ """검증 이슈 심각도"""
27
+
28
+ ERROR = "error" # 실행 실패 예상
29
+ WARNING = "warning" # 잠재적 문제
30
+ INFO = "info" # 참고 정보
31
+
32
+
33
+ class IssueCategory(Enum):
34
+ """검증 이슈 카테고리"""
35
+
36
+ SYNTAX = "syntax" # 문법 오류
37
+ UNDEFINED_NAME = "undefined_name" # 미정의 변수/함수
38
+ UNUSED_IMPORT = "unused_import" # 미사용 import
39
+ UNUSED_VARIABLE = "unused_variable" # 미사용 변수
40
+ REDEFINED = "redefined" # 재정의
41
+ IMPORT_ERROR = "import_error" # import 오류
42
+ TYPE_ERROR = "type_error" # 타입 관련 이슈
43
+ STYLE = "style" # 코딩 스타일 (Ruff)
44
+ SECURITY = "security" # 보안 취약점 (Ruff)
45
+ COMPLEXITY = "complexity" # 코드 복잡도 (Ruff)
46
+ BEST_PRACTICE = "best_practice" # 권장 사항 (Ruff)
47
+
48
+
49
+ @dataclass
50
+ class ValidationIssue:
51
+ """검증 이슈"""
52
+
53
+ severity: IssueSeverity
54
+ category: IssueCategory
55
+ message: str
56
+ line: Optional[int] = None
57
+ column: Optional[int] = None
58
+ code_snippet: Optional[str] = None
59
+
60
+ def to_dict(self) -> Dict[str, Any]:
61
+ return {
62
+ "severity": self.severity.value,
63
+ "category": self.category.value,
64
+ "message": self.message,
65
+ "line": self.line,
66
+ "column": self.column,
67
+ "code_snippet": self.code_snippet,
68
+ }
69
+
70
+
71
+ @dataclass
72
+ class DependencyInfo:
73
+ """코드 의존성 정보"""
74
+
75
+ imports: List[str] = field(default_factory=list) # import된 모듈
76
+ from_imports: Dict[str, List[str]] = field(default_factory=dict) # from X import Y
77
+ defined_names: List[str] = field(default_factory=list) # 정의된 변수/함수/클래스
78
+ used_names: List[str] = field(default_factory=list) # 사용된 이름들
79
+ undefined_names: List[str] = field(default_factory=list) # 미정의 이름들
80
+
81
+ def to_dict(self) -> Dict[str, Any]:
82
+ return {
83
+ "imports": self.imports,
84
+ "from_imports": self.from_imports,
85
+ "defined_names": self.defined_names,
86
+ "used_names": self.used_names,
87
+ "undefined_names": self.undefined_names,
88
+ }
89
+
90
+
91
+ @dataclass
92
+ class ValidationResult:
93
+ """검증 결과"""
94
+
95
+ is_valid: bool
96
+ issues: List[ValidationIssue] = field(default_factory=list)
97
+ dependencies: Optional[DependencyInfo] = None
98
+ has_errors: bool = False
99
+ has_warnings: bool = False
100
+ summary: str = ""
101
+ fixed_code: Optional[str] = None # 자동 수정된 코드
102
+ fixed_count: int = 0 # 자동 수정된 이슈 수
103
+
104
+ def to_dict(self) -> Dict[str, Any]:
105
+ return {
106
+ "is_valid": self.is_valid,
107
+ "issues": [issue.to_dict() for issue in self.issues],
108
+ "dependencies": self.dependencies.to_dict() if self.dependencies else None,
109
+ "has_errors": self.has_errors,
110
+ "has_warnings": self.has_warnings,
111
+ "summary": self.summary,
112
+ "fixed_code": self.fixed_code,
113
+ "fixed_count": self.fixed_count,
114
+ }
115
+
116
+
117
+ class CodeValidator:
118
+ """코드 품질 검증 서비스"""
119
+
120
+ # Python 내장 이름들 (미정의로 잡히면 안 되는 것들)
121
+ BUILTIN_NAMES = set(
122
+ dir(__builtins__) if isinstance(__builtins__, dict) else dir(__builtins__)
123
+ )
124
+ BUILTIN_NAMES.update(
125
+ {
126
+ "True",
127
+ "False",
128
+ "None",
129
+ "print",
130
+ "len",
131
+ "range",
132
+ "str",
133
+ "int",
134
+ "float",
135
+ "list",
136
+ "dict",
137
+ "set",
138
+ "tuple",
139
+ "bool",
140
+ "type",
141
+ "object",
142
+ "super",
143
+ "open",
144
+ "input",
145
+ "sorted",
146
+ "reversed",
147
+ "enumerate",
148
+ "zip",
149
+ "map",
150
+ "filter",
151
+ "all",
152
+ "any", # ★ 중요: iterable 검사 내장 함수
153
+ "sum",
154
+ "min",
155
+ "max",
156
+ "abs",
157
+ "round",
158
+ "pow",
159
+ "divmod",
160
+ "isinstance",
161
+ "issubclass",
162
+ "hasattr",
163
+ "getattr",
164
+ "setattr",
165
+ "delattr",
166
+ "callable",
167
+ "iter",
168
+ "next",
169
+ "id",
170
+ "hash",
171
+ "repr",
172
+ "ascii",
173
+ "bin",
174
+ "hex",
175
+ "oct",
176
+ "ord",
177
+ "chr",
178
+ "format",
179
+ "vars",
180
+ "dir",
181
+ "help",
182
+ "locals",
183
+ "globals",
184
+ "slice",
185
+ "frozenset",
186
+ "bytes",
187
+ "bytearray",
188
+ "memoryview", # ★ 추가 내장 타입
189
+ "complex",
190
+ "setattr",
191
+ "delattr", # ★ 추가 내장 함수
192
+ "staticmethod",
193
+ "classmethod",
194
+ "property",
195
+ "exec",
196
+ "eval",
197
+ "compile",
198
+ "globals",
199
+ "locals",
200
+ "breakpoint",
201
+ "Exception",
202
+ "BaseException",
203
+ "ValueError",
204
+ "TypeError",
205
+ "KeyError",
206
+ "IndexError",
207
+ "AttributeError",
208
+ "ImportError",
209
+ "RuntimeError",
210
+ "StopIteration",
211
+ "GeneratorExit",
212
+ "AssertionError",
213
+ "NotImplementedError",
214
+ "NameError",
215
+ "ZeroDivisionError",
216
+ "FileNotFoundError",
217
+ "IOError",
218
+ "OSError",
219
+ "PermissionError",
220
+ "TimeoutError",
221
+ "ConnectionError",
222
+ "BrokenPipeError",
223
+ "MemoryError",
224
+ "RecursionError",
225
+ "OverflowError",
226
+ "FloatingPointError",
227
+ "ArithmeticError",
228
+ "LookupError",
229
+ "UnicodeError",
230
+ "UnicodeDecodeError",
231
+ "UnicodeEncodeError",
232
+ "SyntaxError",
233
+ "IndentationError",
234
+ "TabError",
235
+ "SystemError",
236
+ "SystemExit",
237
+ "KeyboardInterrupt",
238
+ "BufferError",
239
+ "EOFError",
240
+ "ModuleNotFoundError",
241
+ "UnboundLocalError",
242
+ "ReferenceError",
243
+ "EnvironmentError",
244
+ "Warning",
245
+ "UserWarning",
246
+ "DeprecationWarning",
247
+ "PendingDeprecationWarning",
248
+ "RuntimeWarning",
249
+ "SyntaxWarning",
250
+ "FutureWarning",
251
+ "ImportWarning",
252
+ "UnicodeWarning",
253
+ "BytesWarning",
254
+ "ResourceWarning",
255
+ "ConnectionAbortedError",
256
+ "ConnectionRefusedError",
257
+ "ConnectionResetError",
258
+ "FileExistsError",
259
+ "IsADirectoryError",
260
+ "NotADirectoryError",
261
+ "InterruptedError",
262
+ "ChildProcessError",
263
+ "ProcessLookupError",
264
+ "BlockingIOError",
265
+ "__name__",
266
+ "__file__",
267
+ "__doc__",
268
+ "__package__",
269
+ # Jupyter/IPython 특수 변수
270
+ "In",
271
+ "Out",
272
+ "_",
273
+ "__",
274
+ "___",
275
+ "get_ipython",
276
+ "display",
277
+ "_i",
278
+ "_ii",
279
+ "_iii",
280
+ "_ih",
281
+ "_oh",
282
+ "_dh",
283
+ }
284
+ )
285
+
286
+ # 일반적인 데이터 과학 라이브러리들 (미정의로 잡히면 안 되는 것들)
287
+ COMMON_LIBRARY_NAMES = {
288
+ # 데이터 처리
289
+ "pd",
290
+ "np",
291
+ "dd",
292
+ "da",
293
+ "xr", # pandas, numpy, dask.dataframe, dask.array, xarray
294
+ # 시각화
295
+ "plt",
296
+ "sns",
297
+ "px",
298
+ "go",
299
+ "fig",
300
+ "ax", # matplotlib, seaborn, plotly
301
+ # 머신러닝
302
+ "tf",
303
+ "torch",
304
+ "sk",
305
+ "nn",
306
+ "F",
307
+ "optim", # tensorflow, pytorch, sklearn
308
+ # 기타 라이브러리
309
+ "scipy",
310
+ "cv2",
311
+ "PIL",
312
+ "Image",
313
+ "requests",
314
+ "json",
315
+ "os",
316
+ "sys",
317
+ "re",
318
+ "datetime",
319
+ "time",
320
+ "math",
321
+ "random",
322
+ "collections",
323
+ "itertools",
324
+ "functools",
325
+ # 추가 common aliases
326
+ "tqdm",
327
+ "glob",
328
+ "Path",
329
+ "pickle",
330
+ "csv",
331
+ "io",
332
+ "logging",
333
+ "warnings",
334
+ "gc",
335
+ "subprocess",
336
+ "shutil",
337
+ "pathlib",
338
+ "typing",
339
+ "copy",
340
+ "multiprocessing",
341
+ }
342
+
343
+ def __init__(self, notebook_context: Optional[Dict[str, Any]] = None):
344
+ """
345
+ Args:
346
+ notebook_context: 노트북 컨텍스트 (이전 셀에서 정의된 변수 등)
347
+ """
348
+ self.notebook_context = notebook_context or {}
349
+ self.known_names = set()
350
+ self._init_known_names()
351
+
352
+ def _preprocess_jupyter_code(self, code: str) -> str:
353
+ """Jupyter magic command 전처리 (AST 파싱 전)
354
+
355
+ ! 로 시작하는 셸 명령과 % 로 시작하는 매직 명령을
356
+ pass 문으로 대체하여 AST 파싱이 가능하도록 함
357
+ """
358
+ lines = code.split("\n")
359
+ processed_lines = []
360
+
361
+ for line in lines:
362
+ stripped = line.lstrip()
363
+ # ! 셸 명령어 (예: !pip install, !{sys.executable})
364
+ if stripped.startswith("!"):
365
+ # 들여쓰기 유지하면서 pass로 대체
366
+ indent = len(line) - len(stripped)
367
+ processed_lines.append(" " * indent + "pass # shell command")
368
+ # % 매직 명령어 (예: %matplotlib inline, %%time)
369
+ elif stripped.startswith("%"):
370
+ indent = len(line) - len(stripped)
371
+ processed_lines.append(" " * indent + "pass # magic command")
372
+ else:
373
+ processed_lines.append(line)
374
+
375
+ return "\n".join(processed_lines)
376
+
377
+ def _init_known_names(self):
378
+ """노트북 컨텍스트에서 알려진 이름들 초기화"""
379
+ self.known_names.update(self.BUILTIN_NAMES)
380
+ self.known_names.update(self.COMMON_LIBRARY_NAMES)
381
+
382
+ # 노트북에서 정의된 변수들
383
+ defined_vars = self.notebook_context.get("definedVariables", [])
384
+ self.known_names.update(defined_vars)
385
+
386
+ # 노트북에서 import된 라이브러리들
387
+ imported_libs = self.notebook_context.get("importedLibraries", [])
388
+ self.known_names.update(imported_libs)
389
+
390
+ def validate_syntax(self, code: str) -> ValidationResult:
391
+ """AST 기반 문법 검사"""
392
+ issues = []
393
+
394
+ # Jupyter magic command 전처리
395
+ processed_code = self._preprocess_jupyter_code(code)
396
+
397
+ try:
398
+ ast.parse(processed_code)
399
+ except SyntaxError as e:
400
+ issues.append(
401
+ ValidationIssue(
402
+ severity=IssueSeverity.ERROR,
403
+ category=IssueCategory.SYNTAX,
404
+ message=f"문법 오류: {e.msg}",
405
+ line=e.lineno,
406
+ column=e.offset,
407
+ code_snippet=e.text.strip() if e.text else None,
408
+ )
409
+ )
410
+
411
+ has_errors = any(issue.severity == IssueSeverity.ERROR for issue in issues)
412
+
413
+ return ValidationResult(
414
+ is_valid=not has_errors,
415
+ issues=issues,
416
+ has_errors=has_errors,
417
+ has_warnings=False,
418
+ summary="문법 오류 없음"
419
+ if not has_errors
420
+ else f"문법 오류 {len(issues)}개 발견",
421
+ )
422
+
423
+ def analyze_dependencies(self, code: str) -> DependencyInfo:
424
+ """코드의 의존성 분석 (import, 정의된 이름, 사용된 이름)"""
425
+ deps = DependencyInfo()
426
+
427
+ # Jupyter magic command 전처리
428
+ processed_code = self._preprocess_jupyter_code(code)
429
+
430
+ try:
431
+ tree = ast.parse(processed_code)
432
+ except SyntaxError:
433
+ return deps
434
+
435
+ # import 분석
436
+ for node in ast.walk(tree):
437
+ if isinstance(node, ast.Import):
438
+ for alias in node.names:
439
+ name = alias.asname if alias.asname else alias.name
440
+ deps.imports.append(name)
441
+ deps.defined_names.append(name.split(".")[0])
442
+
443
+ elif isinstance(node, ast.ImportFrom):
444
+ module = node.module or ""
445
+ imported_names = []
446
+ for alias in node.names:
447
+ name = alias.asname if alias.asname else alias.name
448
+ imported_names.append(name)
449
+ deps.defined_names.append(name)
450
+ deps.from_imports[module] = imported_names
451
+
452
+ # 정의된 이름 분석
453
+ for node in ast.walk(tree):
454
+ if isinstance(node, ast.FunctionDef) or isinstance(
455
+ node, ast.AsyncFunctionDef
456
+ ):
457
+ deps.defined_names.append(node.name)
458
+ elif isinstance(node, ast.ClassDef):
459
+ deps.defined_names.append(node.name)
460
+ elif isinstance(node, ast.Assign):
461
+ for target in node.targets:
462
+ if isinstance(target, ast.Name):
463
+ deps.defined_names.append(target.id)
464
+ elif isinstance(target, ast.Tuple):
465
+ for elt in target.elts:
466
+ if isinstance(elt, ast.Name):
467
+ deps.defined_names.append(elt.id)
468
+ elif isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name):
469
+ deps.defined_names.append(node.target.id)
470
+ elif isinstance(node, ast.For):
471
+ # for 루프 변수 처리 (단일 변수 및 튜플 언패킹)
472
+ if isinstance(node.target, ast.Name):
473
+ deps.defined_names.append(node.target.id)
474
+ elif isinstance(node.target, ast.Tuple):
475
+ for elt in node.target.elts:
476
+ if isinstance(elt, ast.Name):
477
+ deps.defined_names.append(elt.id)
478
+ # ★ Exception handler 변수 처리 (except Exception as e:)
479
+ elif isinstance(node, ast.ExceptHandler) and node.name:
480
+ deps.defined_names.append(node.name)
481
+ # ★ List/Set/Dict comprehension 및 Generator expression의 루프 변수 처리
482
+ elif isinstance(
483
+ node, (ast.ListComp, ast.SetComp, ast.GeneratorExp, ast.DictComp)
484
+ ):
485
+ for generator in node.generators:
486
+ if isinstance(generator.target, ast.Name):
487
+ deps.defined_names.append(generator.target.id)
488
+ elif isinstance(generator.target, ast.Tuple):
489
+ for elt in generator.target.elts:
490
+ if isinstance(elt, ast.Name):
491
+ deps.defined_names.append(elt.id)
492
+ elif isinstance(node, (ast.With, ast.AsyncWith)):
493
+ for item in node.items:
494
+ if item.optional_vars and isinstance(item.optional_vars, ast.Name):
495
+ deps.defined_names.append(item.optional_vars.id)
496
+ # ★ Lambda 매개변수 처리
497
+ elif isinstance(node, ast.Lambda):
498
+ for arg in node.args.args:
499
+ deps.defined_names.append(arg.arg)
500
+ # *args, **kwargs도 처리
501
+ if node.args.vararg:
502
+ deps.defined_names.append(node.args.vararg.arg)
503
+ if node.args.kwarg:
504
+ deps.defined_names.append(node.args.kwarg.arg)
505
+
506
+ # 사용된 이름 분석
507
+ for node in ast.walk(tree):
508
+ if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load):
509
+ deps.used_names.append(node.id)
510
+
511
+ # 중복 제거
512
+ deps.defined_names = list(set(deps.defined_names))
513
+ deps.used_names = list(set(deps.used_names))
514
+
515
+ return deps
516
+
517
+ def check_undefined_names(self, code: str) -> List[ValidationIssue]:
518
+ """미정의 변수/함수 감지
519
+
520
+ 모듈 attribute access 패턴(xxx.yyy)에서 xxx가 undefined인 경우:
521
+ - WARNING으로 처리 (import 가능성 있음)
522
+ - 실제 실행에서 ModuleNotFoundError로 구체적인 에러를 받을 수 있음
523
+ """
524
+ issues = []
525
+
526
+ # Jupyter magic command 전처리
527
+ processed_code = self._preprocess_jupyter_code(code)
528
+
529
+ try:
530
+ tree = ast.parse(processed_code)
531
+ except SyntaxError:
532
+ return issues
533
+
534
+ deps = self.analyze_dependencies(code)
535
+
536
+ # 코드에서 정의된 이름들 수집
537
+ local_defined = set(deps.defined_names)
538
+
539
+ # attribute access의 대상이 되는 이름들 수집 (xxx.yyy 패턴의 xxx)
540
+ attribute_access_names = set()
541
+ for node in ast.walk(tree):
542
+ if isinstance(node, ast.Attribute):
543
+ # xxx.yyy 형태에서 xxx 추출
544
+ current = node.value
545
+ while isinstance(current, ast.Attribute):
546
+ current = current.value
547
+ if isinstance(current, ast.Name):
548
+ attribute_access_names.add(current.id)
549
+
550
+ # 사용된 이름 중 정의되지 않은 것 찾기
551
+ for node in ast.walk(tree):
552
+ if isinstance(node, ast.Name) and isinstance(node.ctx, ast.Load):
553
+ name = node.id
554
+ if (
555
+ name not in local_defined
556
+ and name not in self.known_names
557
+ and not name.startswith("_")
558
+ ):
559
+ # 모듈 attribute access 패턴인지 확인 (xxx.yyy의 xxx)
560
+ # 이 경우 import 가능성이 있으므로 WARNING으로 처리
561
+ if name in attribute_access_names:
562
+ issues.append(
563
+ ValidationIssue(
564
+ severity=IssueSeverity.WARNING,
565
+ category=IssueCategory.UNDEFINED_NAME,
566
+ message=f"'{name}'이(가) 정의되지 않았습니다 (모듈 import 필요 가능성)",
567
+ line=node.lineno,
568
+ column=node.col_offset,
569
+ )
570
+ )
571
+ else:
572
+ issues.append(
573
+ ValidationIssue(
574
+ severity=IssueSeverity.ERROR,
575
+ category=IssueCategory.UNDEFINED_NAME,
576
+ message=f"'{name}'이(가) 정의되지 않았습니다",
577
+ line=node.lineno,
578
+ column=node.col_offset,
579
+ )
580
+ )
581
+ deps.undefined_names.append(name)
582
+
583
+ # 중복 이슈 제거 (같은 이름에 대해 여러 번 보고하지 않음)
584
+ seen_names = set()
585
+ unique_issues = []
586
+ for issue in issues:
587
+ name = issue.message.split("'")[1]
588
+ if name not in seen_names:
589
+ seen_names.add(name)
590
+ unique_issues.append(issue)
591
+
592
+ return unique_issues
593
+
594
+ def check_with_pyflakes(self, code: str) -> List[ValidationIssue]:
595
+ """Pyflakes 정적 분석 (사용 가능한 경우)
596
+
597
+ undefined name 처리 시 모듈 attribute access 패턴을 확인하여
598
+ WARNING으로 처리 (실제 실행에서 구체적인 에러를 받을 수 있도록)
599
+ """
600
+ issues = []
601
+
602
+ try:
603
+ from pyflakes import api as pyflakes_api
604
+ from pyflakes import reporter as pyflakes_reporter
605
+ except ImportError:
606
+ # pyflakes가 설치되지 않은 경우 스킵
607
+ return issues
608
+
609
+ # Jupyter magic command 전처리
610
+ processed_code = self._preprocess_jupyter_code(code)
611
+
612
+ # attribute access 패턴 감지를 위해 AST 분석
613
+ attribute_access_names = set()
614
+ try:
615
+ tree = ast.parse(processed_code)
616
+ for node in ast.walk(tree):
617
+ if isinstance(node, ast.Attribute):
618
+ current = node.value
619
+ while isinstance(current, ast.Attribute):
620
+ current = current.value
621
+ if isinstance(current, ast.Name):
622
+ attribute_access_names.add(current.id)
623
+ except SyntaxError:
624
+ pass
625
+
626
+ # Pyflakes 출력 캡처
627
+ warning_stream = StringIO()
628
+ error_stream = StringIO()
629
+
630
+ reporter = pyflakes_reporter.Reporter(warning_stream, error_stream)
631
+
632
+ try:
633
+ pyflakes_api.check(processed_code, "<code>", reporter)
634
+ except Exception:
635
+ return issues
636
+
637
+ # 경고 파싱
638
+ warnings_output = warning_stream.getvalue()
639
+ for line in warnings_output.strip().split("\n"):
640
+ if not line:
641
+ continue
642
+
643
+ # Pyflakes 출력 형식: <file>:<line>:<col>: <message>
644
+ # 또는: <file>:<line>: <message>
645
+ parts = line.split(":", 3)
646
+ if len(parts) >= 3:
647
+ try:
648
+ line_num = int(parts[1])
649
+ message = parts[-1].strip()
650
+
651
+ # 카테고리 결정
652
+ category = IssueCategory.UNDEFINED_NAME
653
+ severity = IssueSeverity.WARNING
654
+
655
+ if "undefined name" in message.lower():
656
+ category = IssueCategory.UNDEFINED_NAME
657
+ # undefined name에서 이름 추출하여 패턴 확인
658
+ # 형식: "undefined name 'xxx'"
659
+ match = re.search(r"'([^']+)'", message)
660
+ if match:
661
+ undef_name = match.group(1)
662
+ # ★ 노트북 컨텍스트에서 이미 알려진 이름이면 무시
663
+ if undef_name in self.known_names:
664
+ continue # 이 이슈는 추가하지 않음
665
+ elif undef_name in attribute_access_names:
666
+ # 모듈 패턴이면 WARNING (실제 실행에서 구체적인 에러 확인)
667
+ severity = IssueSeverity.WARNING
668
+ message = f"{message} (모듈 import 필요 가능성)"
669
+ else:
670
+ severity = IssueSeverity.ERROR
671
+ else:
672
+ severity = IssueSeverity.ERROR
673
+ elif "imported but unused" in message.lower():
674
+ category = IssueCategory.UNUSED_IMPORT
675
+ severity = IssueSeverity.WARNING
676
+ elif "assigned to but never used" in message.lower():
677
+ category = IssueCategory.UNUSED_VARIABLE
678
+ severity = IssueSeverity.INFO
679
+ elif "redefinition" in message.lower():
680
+ category = IssueCategory.REDEFINED
681
+ severity = IssueSeverity.WARNING
682
+
683
+ issues.append(
684
+ ValidationIssue(
685
+ severity=severity,
686
+ category=category,
687
+ message=message,
688
+ line=line_num,
689
+ )
690
+ )
691
+ except (ValueError, IndexError):
692
+ continue
693
+
694
+ return issues
695
+
696
+ def check_with_ruff(
697
+ self, code: str, auto_fix: bool = True
698
+ ) -> Tuple[str, List[ValidationIssue]]:
699
+ """Ruff 기반 고급 정적 분석 (700+ 규칙) + 자동 수정
700
+
701
+ Args:
702
+ code: 검사할 Python 코드
703
+ auto_fix: True면 자동 수정 가능한 이슈를 수정하고 수정된 코드 반환
704
+
705
+ Returns:
706
+ Tuple of (fixed_code, unfixable_issues)
707
+ - fixed_code: 자동 수정된 코드 (auto_fix=False면 원본 코드)
708
+ - unfixable_issues: 자동 수정 불가능한 이슈 목록
709
+
710
+ Ruff 규칙 카테고리:
711
+ - F: Pyflakes (미정의/미사용 변수)
712
+ - E/W: pycodestyle (스타일)
713
+ - C90: mccabe (복잡도)
714
+ - S: flake8-bandit (보안)
715
+ - B: flake8-bugbear (버그 패턴)
716
+ """
717
+ import json
718
+ import shutil
719
+
720
+ issues = []
721
+ fixed_code = code # 기본값은 원본 코드
722
+
723
+ # Ruff 실행 파일 찾기
724
+ ruff_path = shutil.which("ruff")
725
+ if not ruff_path:
726
+ # Ruff가 설치되지 않음 - 원본 코드와 빈 리스트 반환
727
+ return fixed_code, issues
728
+
729
+ # Jupyter magic command 전처리
730
+ processed_code = self._preprocess_jupyter_code(code)
731
+ # 원본 코드의 magic command 위치 저장 (복원용)
732
+ magic_lines = self._extract_magic_lines(code)
733
+
734
+ temp_path = None
735
+ try:
736
+ with tempfile.NamedTemporaryFile(
737
+ mode="w", suffix=".py", delete=False, encoding="utf-8"
738
+ ) as f:
739
+ f.write(processed_code)
740
+ temp_path = f.name
741
+
742
+ # Pass 1: 자동 수정 (auto_fix=True인 경우)
743
+ if auto_fix:
744
+ subprocess.run(
745
+ [
746
+ ruff_path,
747
+ "check",
748
+ temp_path,
749
+ "--fix",
750
+ "--select=F,E,W,C90,S,B",
751
+ "--ignore=E501,W292",
752
+ ],
753
+ capture_output=True,
754
+ text=True,
755
+ timeout=10,
756
+ )
757
+
758
+ # 수정된 코드 읽기
759
+ with open(temp_path, "r", encoding="utf-8") as f:
760
+ fixed_processed_code = f.read()
761
+
762
+ # 수정이 있었는지 확인
763
+ if fixed_processed_code != processed_code:
764
+ # Magic command 복원
765
+ fixed_code = self._restore_magic_lines(
766
+ fixed_processed_code, magic_lines
767
+ )
768
+
769
+ # Pass 2: 남은 오류 확인 (수정 불가능한 것들)
770
+ result = subprocess.run(
771
+ [
772
+ ruff_path,
773
+ "check",
774
+ temp_path,
775
+ "--output-format=json",
776
+ "--select=F,E,W,C90,S,B",
777
+ "--ignore=E501,W292",
778
+ ],
779
+ capture_output=True,
780
+ text=True,
781
+ timeout=10,
782
+ )
783
+
784
+ # JSON 결과 파싱
785
+ if result.stdout.strip():
786
+ ruff_issues = json.loads(result.stdout)
787
+
788
+ for item in ruff_issues:
789
+ code_rule = item.get("code", "")
790
+ message = item.get("message", "")
791
+ line = item.get("location", {}).get("row", 1)
792
+
793
+ # 규칙 코드로 카테고리 및 심각도 결정
794
+ category, severity = self._categorize_ruff_rule(code_rule)
795
+
796
+ # 노트북 컨텍스트에서 알려진 이름이면 무시 (F821: undefined name)
797
+ if code_rule == "F821":
798
+ match = re.search(r"`([^`]+)`", message)
799
+ if match:
800
+ undef_name = match.group(1)
801
+ if undef_name in self.known_names:
802
+ continue
803
+
804
+ issues.append(
805
+ ValidationIssue(
806
+ severity=severity,
807
+ category=category,
808
+ message=f"[{code_rule}] {message}",
809
+ line=line,
810
+ )
811
+ )
812
+
813
+ except subprocess.TimeoutExpired:
814
+ # Ruff 타임아웃 - 원본 코드 반환
815
+ pass
816
+ except FileNotFoundError:
817
+ # Ruff가 설치되지 않음
818
+ pass
819
+ except json.JSONDecodeError:
820
+ # JSON 파싱 오류
821
+ pass
822
+ except Exception:
823
+ # 기타 오류
824
+ pass
825
+ finally:
826
+ # 임시 파일 삭제
827
+ if temp_path:
828
+ try:
829
+ os.unlink(temp_path)
830
+ except Exception:
831
+ pass
832
+
833
+ return fixed_code, issues
834
+
835
+ def _extract_magic_lines(self, code: str) -> Dict[int, str]:
836
+ """원본 코드에서 Jupyter magic command 라인 추출
837
+
838
+ Returns:
839
+ Dict[line_number, original_line] - 0-indexed line numbers
840
+ """
841
+ magic_lines = {}
842
+ for i, line in enumerate(code.split("\n")):
843
+ stripped = line.lstrip()
844
+ if stripped.startswith("!") or stripped.startswith("%"):
845
+ magic_lines[i] = line
846
+ return magic_lines
847
+
848
+ def _restore_magic_lines(
849
+ self, processed_code: str, magic_lines: Dict[int, str]
850
+ ) -> str:
851
+ """전처리된 코드에 원본 magic command 복원"""
852
+ if not magic_lines:
853
+ return processed_code
854
+
855
+ lines = processed_code.split("\n")
856
+ for line_num, original_line in magic_lines.items():
857
+ if line_num < len(lines):
858
+ lines[line_num] = original_line
859
+ return "\n".join(lines)
860
+
861
+ def _count_fixes(self, original: str, fixed: str) -> int:
862
+ """수정된 라인 수 계산"""
863
+ original_lines = original.split("\n")
864
+ fixed_lines = fixed.split("\n")
865
+ count = 0
866
+ for i, (orig, fix) in enumerate(zip(original_lines, fixed_lines)):
867
+ if orig != fix:
868
+ count += 1
869
+ # 라인 수 차이도 고려
870
+ count += abs(len(original_lines) - len(fixed_lines))
871
+ return count
872
+
873
+ def _categorize_ruff_rule(self, code: str) -> tuple:
874
+ """Ruff 규칙 코드를 카테고리와 심각도로 변환"""
875
+ # F: Pyflakes 규칙
876
+ if code.startswith("F"):
877
+ if code in ("F821", "F822", "F823"): # undefined name
878
+ return IssueCategory.UNDEFINED_NAME, IssueSeverity.ERROR
879
+ elif code in ("F401",): # unused import
880
+ return IssueCategory.UNUSED_IMPORT, IssueSeverity.WARNING
881
+ elif code in ("F841",): # unused variable
882
+ return IssueCategory.UNUSED_VARIABLE, IssueSeverity.INFO
883
+ else:
884
+ return IssueCategory.SYNTAX, IssueSeverity.WARNING
885
+
886
+ # E: pycodestyle 에러
887
+ elif code.startswith("E"):
888
+ if code.startswith("E9"): # 런타임 에러 (SyntaxError 등)
889
+ return IssueCategory.SYNTAX, IssueSeverity.ERROR
890
+ else:
891
+ return IssueCategory.STYLE, IssueSeverity.INFO
892
+
893
+ # W: pycodestyle 경고
894
+ elif code.startswith("W"):
895
+ return IssueCategory.STYLE, IssueSeverity.INFO
896
+
897
+ # C90: mccabe 복잡도
898
+ elif code.startswith("C9"):
899
+ return IssueCategory.COMPLEXITY, IssueSeverity.WARNING
900
+
901
+ # S: 보안 (flake8-bandit)
902
+ elif code.startswith("S"):
903
+ if code in ("S101",): # assert 사용 (테스트 코드에서는 OK)
904
+ return IssueCategory.SECURITY, IssueSeverity.INFO
905
+ elif code in (
906
+ "S102",
907
+ "S103",
908
+ "S104",
909
+ "S105",
910
+ "S106",
911
+ "S107",
912
+ ): # 하드코딩 비밀번호 등
913
+ return IssueCategory.SECURITY, IssueSeverity.WARNING
914
+ else:
915
+ return IssueCategory.SECURITY, IssueSeverity.WARNING
916
+
917
+ # B: flake8-bugbear (버그 패턴)
918
+ elif code.startswith("B"):
919
+ return IssueCategory.BEST_PRACTICE, IssueSeverity.WARNING
920
+
921
+ # 기본값
922
+ return IssueCategory.STYLE, IssueSeverity.INFO
923
+
924
+ def full_validation(self, code: str) -> ValidationResult:
925
+ """전체 검증 수행"""
926
+ all_issues = []
927
+
928
+ # 1. 문법 검사
929
+ syntax_result = self.validate_syntax(code)
930
+ all_issues.extend(syntax_result.issues)
931
+
932
+ # 문법 오류가 있으면 더 이상 진행하지 않음
933
+ if syntax_result.has_errors:
934
+ return ValidationResult(
935
+ is_valid=False,
936
+ issues=all_issues,
937
+ has_errors=True,
938
+ has_warnings=False,
939
+ summary=f"문법 오류로 인해 검증 중단: {len(all_issues)}개 오류",
940
+ )
941
+
942
+ # 2. 의존성 분석
943
+ dependencies = self.analyze_dependencies(code)
944
+
945
+ # 3. 미정의 변수 검사
946
+ undefined_issues = self.check_undefined_names(code)
947
+ all_issues.extend(undefined_issues)
948
+
949
+ # 4. Ruff 검사 (우선) - 더 포괄적이고 빠름 + 자동 수정
950
+ fixed_code, ruff_issues = self.check_with_ruff(code)
951
+
952
+ # Ruff 이슈 중 중복되지 않는 것만 추가
953
+ existing_messages = {issue.message for issue in all_issues}
954
+ for issue in ruff_issues:
955
+ # 메시지 정규화 (Ruff 규칙 코드 제외)
956
+ base_msg = re.sub(r"\[F\d+\]\s*", "", issue.message)
957
+ if (
958
+ base_msg not in existing_messages
959
+ and issue.message not in existing_messages
960
+ ):
961
+ all_issues.append(issue)
962
+ existing_messages.add(issue.message)
963
+
964
+ # 5. Pyflakes 검사 (Ruff fallback) - Ruff가 실패했거나 추가 검사
965
+ pyflakes_issues = self.check_with_pyflakes(code)
966
+
967
+ # Pyflakes 이슈 중 중복되지 않는 것만 추가
968
+ for issue in pyflakes_issues:
969
+ if issue.message not in existing_messages:
970
+ all_issues.append(issue)
971
+ existing_messages.add(issue.message)
972
+
973
+ # 6. 의존성에서 미정의 이름 업데이트
974
+ undefined_names = []
975
+ for issue in all_issues:
976
+ if issue.category == IssueCategory.UNDEFINED_NAME:
977
+ # 다양한 메시지 포맷 지원
978
+ # Pyflakes: "undefined name 'xxx'" 또는 "'xxx'이(가) 정의되지 않았습니다"
979
+ # Ruff: "[F821] Undefined name `xxx`"
980
+ msg = issue.message
981
+ name = None
982
+ if "'" in msg:
983
+ parts = msg.split("'")
984
+ if len(parts) >= 2:
985
+ name = parts[1]
986
+ elif "`" in msg:
987
+ parts = msg.split("`")
988
+ if len(parts) >= 2:
989
+ name = parts[1]
990
+ if name:
991
+ undefined_names.append(name)
992
+ dependencies.undefined_names = list(set(undefined_names))
993
+
994
+ # 결과 집계
995
+ has_errors = any(issue.severity == IssueSeverity.ERROR for issue in all_issues)
996
+ has_warnings = any(
997
+ issue.severity == IssueSeverity.WARNING for issue in all_issues
998
+ )
999
+
1000
+ error_count = sum(
1001
+ 1 for issue in all_issues if issue.severity == IssueSeverity.ERROR
1002
+ )
1003
+ warning_count = sum(
1004
+ 1 for issue in all_issues if issue.severity == IssueSeverity.WARNING
1005
+ )
1006
+
1007
+ # 자동 수정 여부 확인
1008
+ code_was_fixed = fixed_code != code
1009
+ fixed_count = 0
1010
+ if code_was_fixed:
1011
+ # 수정 전후 라인 수 비교로 대략적인 수정 수 계산
1012
+ orig_lines = code.split("\n")
1013
+ fixed_lines = fixed_code.split("\n")
1014
+ fixed_count = sum(1 for o, f in zip(orig_lines, fixed_lines) if o != f)
1015
+ fixed_count += abs(len(orig_lines) - len(fixed_lines))
1016
+
1017
+ if has_errors:
1018
+ summary = f"검증 실패: {error_count}개 오류, {warning_count}개 경고"
1019
+ elif has_warnings:
1020
+ if code_was_fixed:
1021
+ summary = (
1022
+ f"검증 통과 ({fixed_count}개 자동 수정, 경고 {warning_count}개)"
1023
+ )
1024
+ else:
1025
+ summary = f"검증 통과 (경고 {warning_count}개)"
1026
+ else:
1027
+ if code_was_fixed:
1028
+ summary = f"검증 통과 ({fixed_count}개 자동 수정)"
1029
+ else:
1030
+ summary = "검증 통과"
1031
+
1032
+ return ValidationResult(
1033
+ is_valid=not has_errors,
1034
+ issues=all_issues,
1035
+ dependencies=dependencies,
1036
+ has_errors=has_errors,
1037
+ has_warnings=has_warnings,
1038
+ summary=summary,
1039
+ fixed_code=fixed_code if code_was_fixed else None,
1040
+ fixed_count=fixed_count,
1041
+ )
1042
+
1043
+ def quick_check(self, code: str) -> Dict[str, Any]:
1044
+ """빠른 검사 (API 응답용 간소화 버전)"""
1045
+ result = self.full_validation(code)
1046
+
1047
+ return {
1048
+ "valid": result.is_valid,
1049
+ "errors": [
1050
+ {"message": i.message, "line": i.line}
1051
+ for i in result.issues
1052
+ if i.severity == IssueSeverity.ERROR
1053
+ ],
1054
+ "warnings": [
1055
+ {"message": i.message, "line": i.line}
1056
+ for i in result.issues
1057
+ if i.severity == IssueSeverity.WARNING
1058
+ ],
1059
+ "summary": result.summary,
1060
+ "fixedCode": result.fixed_code,
1061
+ "fixedCount": result.fixed_count,
1062
+ }
1063
+
1064
+
1065
+ class APIPatternChecker:
1066
+ """
1067
+ 라이브러리별 API 안티패턴 감지
1068
+ 실행 전에 잘못된 API 사용을 감지하여 에러 예방
1069
+ 토큰 절약: replan 호출 자체를 방지하여 간접적으로 절약
1070
+ """
1071
+
1072
+ # Dask 안티패턴 (가장 흔한 실수들)
1073
+ DASK_ANTIPATTERNS = [
1074
+ # head() 후 compute() - head()는 이미 pandas DataFrame 반환
1075
+ (
1076
+ r"\.head\([^)]*\)\.compute\(\)",
1077
+ "head()는 이미 pandas DataFrame을 반환합니다. compute() 불필요!",
1078
+ ),
1079
+ # columns.compute() - columns는 이미 pandas Index
1080
+ (
1081
+ r"\.columns\.compute\(\)",
1082
+ "columns는 이미 pandas Index입니다. compute() 불필요!",
1083
+ ),
1084
+ # dtypes.compute() - dtypes도 이미 pandas
1085
+ (r"\.dtypes\.compute\(\)", "dtypes는 이미 pandas입니다. compute() 불필요!"),
1086
+ # value_counts(normalize=True) - Dask는 지원 안함
1087
+ (
1088
+ r"\.value_counts\(\s*normalize\s*=\s*True",
1089
+ "Dask는 value_counts(normalize=True)를 지원하지 않습니다.",
1090
+ ),
1091
+ # value_counts().unstack() - Dask Series에 unstack 없음
1092
+ (
1093
+ r"\.value_counts\([^)]*\)\.unstack\(",
1094
+ "Dask Series에는 unstack() 메서드가 없습니다.",
1095
+ ),
1096
+ # corr() 전체 - 문자열 컬럼 포함 시 에러
1097
+ (
1098
+ r"(?<!\[\w+\])\.corr\(\)\.compute\(\)",
1099
+ "corr()는 숫자형 컬럼만 선택 후 사용하세요: df[numeric_cols].corr().compute()",
1100
+ ),
1101
+ ]
1102
+
1103
+ # Matplotlib 안티패턴
1104
+ MATPLOTLIB_ANTIPATTERNS = [
1105
+ # tick_params에 ha 파라미터 사용 - 지원 안함
1106
+ (
1107
+ r"tick_params\([^)]*ha\s*=",
1108
+ "tick_params()에 ha 파라미터는 사용 불가. plt.setp(ax.get_xticklabels(), ha='right') 사용하세요.",
1109
+ ),
1110
+ # tick_params에 rotation과 ha 함께 - 지원 안함
1111
+ (
1112
+ r"tick_params\([^)]*rotation\s*=",
1113
+ "tick_params()에 rotation 파라미터는 사용 불가. plt.xticks(rotation=...) 사용하세요.",
1114
+ ),
1115
+ ]
1116
+
1117
+ # Pandas 안티패턴 (일반적인 실수)
1118
+ PANDAS_ANTIPATTERNS = [
1119
+ # inplace=True와 할당 동시 사용
1120
+ (
1121
+ r"=\s*\w+\.\w+\([^)]*inplace\s*=\s*True",
1122
+ "inplace=True 사용 시 할당하지 마세요. 결과가 None입니다.",
1123
+ ),
1124
+ # iterrows() 대신 itertuples() 권장
1125
+ (
1126
+ r"\.iterrows\(\)",
1127
+ "iterrows()는 느립니다. 가능하면 itertuples() 또는 벡터 연산을 사용하세요.",
1128
+ IssueSeverity.INFO,
1129
+ ), # INFO로 처리 (경고만)
1130
+ ]
1131
+
1132
+ # Polars 안티패턴
1133
+ POLARS_ANTIPATTERNS = [
1134
+ # pandas 스타일 인덱싱
1135
+ (
1136
+ r"\.loc\[",
1137
+ "Polars는 .loc 인덱싱을 지원하지 않습니다. filter() 또는 select()를 사용하세요.",
1138
+ ),
1139
+ (
1140
+ r"\.iloc\[",
1141
+ "Polars는 .iloc 인덱싱을 지원하지 않습니다. slice() 또는 row()를 사용하세요.",
1142
+ ),
1143
+ ]
1144
+
1145
+ # 라이브러리별 패턴 매핑
1146
+ LIBRARY_PATTERNS = {
1147
+ "dask": DASK_ANTIPATTERNS,
1148
+ "matplotlib": MATPLOTLIB_ANTIPATTERNS,
1149
+ "pandas": PANDAS_ANTIPATTERNS,
1150
+ "polars": POLARS_ANTIPATTERNS,
1151
+ }
1152
+
1153
+ def check(
1154
+ self, code: str, detected_libraries: List[str] = None
1155
+ ) -> List[ValidationIssue]:
1156
+ """
1157
+ 코드에서 API 안티패턴 검사
1158
+
1159
+ Args:
1160
+ code: 검사할 Python 코드
1161
+ detected_libraries: 사용 중인 라이브러리 목록
1162
+
1163
+ Returns:
1164
+ 발견된 API 안티패턴 이슈 목록
1165
+ """
1166
+ issues = []
1167
+ detected_libraries = detected_libraries or []
1168
+
1169
+ # 코드에서 라이브러리 사용 감지 (import 또는 alias)
1170
+ libraries_in_use = self._detect_libraries_in_code(code, detected_libraries)
1171
+
1172
+ for lib in libraries_in_use:
1173
+ patterns = self.LIBRARY_PATTERNS.get(lib, [])
1174
+
1175
+ for pattern_info in patterns:
1176
+ # 패턴이 (pattern, message) 또는 (pattern, message, severity) 형태
1177
+ if len(pattern_info) == 2:
1178
+ pattern, message = pattern_info
1179
+ severity = IssueSeverity.WARNING
1180
+ else:
1181
+ pattern, message, severity = pattern_info
1182
+
1183
+ matches = list(re.finditer(pattern, code))
1184
+ for match in matches:
1185
+ # 매칭 위치에서 라인 번호 계산
1186
+ line_num = code[: match.start()].count("\n") + 1
1187
+
1188
+ issues.append(
1189
+ ValidationIssue(
1190
+ severity=severity,
1191
+ category=IssueCategory.BEST_PRACTICE,
1192
+ message=f"[API 패턴] {message}",
1193
+ line=line_num,
1194
+ code_snippet=match.group(0)[:50],
1195
+ )
1196
+ )
1197
+
1198
+ return issues
1199
+
1200
+ def _detect_libraries_in_code(
1201
+ self, code: str, detected_libraries: List[str]
1202
+ ) -> List[str]:
1203
+ """코드에서 사용 중인 라이브러리 감지"""
1204
+ libraries = set(detected_libraries)
1205
+
1206
+ # import 문에서 감지
1207
+ import_patterns = {
1208
+ "dask": [r"import\s+dask", r"from\s+dask", r"\bdd\.", r"\bda\."],
1209
+ "matplotlib": [
1210
+ r"import\s+matplotlib",
1211
+ r"from\s+matplotlib",
1212
+ r"\bplt\.",
1213
+ r"import\s+seaborn",
1214
+ r"\bsns\.",
1215
+ ],
1216
+ "pandas": [r"import\s+pandas", r"from\s+pandas", r"\bpd\."],
1217
+ "polars": [r"import\s+polars", r"from\s+polars", r"\bpl\."],
1218
+ }
1219
+
1220
+ for lib, patterns in import_patterns.items():
1221
+ for pattern in patterns:
1222
+ if re.search(pattern, code, re.IGNORECASE):
1223
+ libraries.add(lib)
1224
+ break
1225
+
1226
+ return list(libraries)
1227
+
1228
+
1229
+ # APIPatternChecker 싱글톤 인스턴스
1230
+ _api_pattern_checker_instance: Optional[APIPatternChecker] = None
1231
+
1232
+
1233
+ def get_api_pattern_checker() -> APIPatternChecker:
1234
+ """싱글톤 APIPatternChecker 반환"""
1235
+ global _api_pattern_checker_instance
1236
+ if _api_pattern_checker_instance is None:
1237
+ _api_pattern_checker_instance = APIPatternChecker()
1238
+ return _api_pattern_checker_instance