aiauto-client 0.1.4__py3-none-any.whl → 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aiauto/__init__.py CHANGED
@@ -1,5 +1,4 @@
1
1
  from .core import AIAutoController, TrialController, CallbackTopNArtifact, StudyWrapper
2
- from .api import create_study
3
2
  from ._config import AIAUTO_API_TARGET
4
3
  from .constants import RUNTIME_IMAGES
5
4
 
@@ -10,7 +9,6 @@ __all__ = [
10
9
  'TrialController',
11
10
  'CallbackTopNArtifact',
12
11
  'StudyWrapper',
13
- 'create_study',
14
12
  'AIAUTO_API_TARGET',
15
13
  'RUNTIME_IMAGES',
16
14
  ]
aiauto/constants.py CHANGED
@@ -9,20 +9,15 @@ RUNTIME_IMAGES = [
9
9
  "ghcr.io/astral-sh/uv:python3.10-bookworm-slim",
10
10
  "ghcr.io/astral-sh/uv:python3.11-bookworm-slim",
11
11
  "ghcr.io/astral-sh/uv:python3.12-bookworm-slim",
12
-
12
+
13
13
  # GPU Images (PyTorch)
14
14
  "pytorch/pytorch:2.1.0-cuda11.8-cudnn8-runtime",
15
15
  "pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime",
16
16
  "pytorch/pytorch:2.4.0-cuda12.4-cudnn9-runtime",
17
-
17
+
18
18
  # GPU Images (TensorFlow)
19
19
  "tensorflow/tensorflow:2.15.0-gpu",
20
- "tensorflow/tensorflow:2.15.0-gpu-jupyter",
21
-
22
- # JAX Images
23
- "nvcr.io/nvidia/jax:23.10-py3",
24
- "nvcr.io/nvidia/jax:24.04-py3",
25
-
20
+
26
21
  # Custom/Legacy images
27
- "ghcr.io/01ai/zipline:latest", # Custom zipline trading library
28
- ]
22
+ "registry.gitlab.com/01ai/eng/aiauto/aiauto/zipline-prepared:main-v00.00.01-amd64-11ca2c41-250901",
23
+ ]
aiauto/core.py CHANGED
@@ -3,7 +3,7 @@ import tempfile
3
3
  from typing import Union, Optional, List, Dict, Callable
4
4
  import optuna
5
5
  from .http_client import ConnectRPCClient
6
- from .serializer import serialize, build_requirements
6
+ from .serializer import serialize, build_requirements, object_to_json
7
7
  from ._config import AIAUTO_API_TARGET
8
8
 
9
9
 
@@ -25,15 +25,15 @@ class AIAutoController:
25
25
  # EnsureWorkspace 호출해서 journal_grpc_storage_proxy_host_external 받아와서 storage 초기화
26
26
  try:
27
27
  response = self.client.call_rpc("EnsureWorkspace", {})
28
-
28
+
29
29
  # 받아온 journal_grpc_storage_proxy_host_external로 storage 초기화
30
30
  host_external = response.get('journalGrpcStorageProxyHostExternal', '')
31
31
  if not host_external:
32
32
  raise RuntimeError("No storage host returned from EnsureWorkspace")
33
-
33
+
34
34
  host, port = host_external.split(':')
35
35
  self.storage = optuna.storages.GrpcStorageProxy(host=host, port=int(port))
36
-
36
+
37
37
  # Store the internal host for CRD usage (if needed later)
38
38
  self.storage_host_internal = response.get('journalGrpcStorageProxyHostInternal', '')
39
39
  self.dashboard_url = response.get('dashboardUrl', '')
@@ -62,6 +62,46 @@ class AIAutoController:
62
62
  def get_artifact_tmp_dir(self):
63
63
  return self.tmp_dir
64
64
 
65
+ def create_study(
66
+ self,
67
+ study_name: str,
68
+ direction: Optional[str] = 'minimize',
69
+ directions: Optional[List[str]] = None,
70
+ sampler: Union[object, dict, None] = None,
71
+ pruner: Union[object, dict, None] = None
72
+ ) -> 'StudyWrapper':
73
+ """Create a new study using the controller's token."""
74
+ if not direction and not directions:
75
+ raise ValueError("Either 'direction' or 'directions' must be specified")
76
+
77
+ if direction and directions:
78
+ raise ValueError("Cannot specify both 'direction' and 'directions'")
79
+
80
+ try:
81
+ # Prepare request data for CreateStudy
82
+ request_data = {
83
+ "spec": {
84
+ "studyName": study_name,
85
+ "direction": direction or "",
86
+ "directions": directions or [],
87
+ "samplerJson": object_to_json(sampler),
88
+ "prunerJson": object_to_json(pruner)
89
+ }
90
+ }
91
+
92
+ # Call CreateStudy RPC
93
+ response = self.client.call_rpc("CreateStudy", request_data)
94
+
95
+ # Return StudyWrapper
96
+ return StudyWrapper(
97
+ study_name=response.get("studyName", study_name),
98
+ storage=self.storage,
99
+ controller=self
100
+ )
101
+
102
+ except Exception as e:
103
+ raise RuntimeError(f"Failed to create study: {e}") from e
104
+
65
105
 
66
106
  class TrialController:
67
107
  def __init__(self, trial: optuna.trial.Trial):
@@ -144,15 +184,20 @@ class StudyWrapper:
144
184
  def optimize(
145
185
  self,
146
186
  objective: Callable,
147
- n_trials: int,
148
- parallelism: int,
187
+ n_trials: int = 10,
188
+ parallelism: int = 2,
149
189
  requirements_file: Optional[str] = None,
150
190
  requirements_list: Optional[List[str]] = None,
151
- resources_requests: Optional[Dict[str, str]] = None,
152
- resources_limits: Optional[Dict[str, str]] = None,
153
- runtime_image: Optional[str] = None,
191
+ resources_requests: Optional[Dict[str, str]] = {"cpu": "256m", "memory": "256Mi"},
192
+ resources_limits: Optional[Dict[str, str]] = {"cpu": "256m", "memory": "256Mi"},
193
+ runtime_image: Optional[str] = 'ghcr.io/astral-sh/uv:python3.8-bookworm-slim',
154
194
  use_gpu: bool = False
155
195
  ) -> None:
196
+ if runtime_image is None or runtime_image == "":
197
+ if use_gpu:
198
+ runtime_image = "pytorch/pytorch:2.1.0-cuda12.1-cudnn8-runtime"
199
+ else:
200
+ runtime_image = "ghcr.io/astral-sh/uv:python3.8-bookworm-slim"
156
201
  try:
157
202
  request_data = {
158
203
  "objective": {
@@ -169,7 +214,7 @@ class StudyWrapper:
169
214
  "useGpu": use_gpu
170
215
  }
171
216
  }
172
-
217
+
173
218
  self._controller.client.call_rpc("Optimize", request_data)
174
219
 
175
220
  except Exception as e:
@@ -178,10 +223,10 @@ class StudyWrapper:
178
223
  def get_status(self) -> dict:
179
224
  try:
180
225
  response = self._controller.client.call_rpc(
181
- "GetStatus",
226
+ "GetStatus",
182
227
  {"studyName": self.study_name}
183
228
  )
184
-
229
+
185
230
  # Convert camelCase to snake_case for backward compatibility
186
231
  return {
187
232
  "study_name": response.get("studyName", ""),
@@ -200,4 +245,4 @@ class StudyWrapper:
200
245
  raise RuntimeError(f"Failed to get status: {e}") from e
201
246
 
202
247
  def __repr__(self) -> str:
203
- return f"StudyWrapper(study_name='{self.study_name}', storage={self._storage})"
248
+ return f"StudyWrapper(study_name='{self.study_name}', storage={self._storage})"
@@ -0,0 +1,439 @@
1
+ Metadata-Version: 2.1
2
+ Name: aiauto-client
3
+ Version: 0.1.6
4
+ Summary: AI Auto HPO (Hyperparameter Optimization) Client Library
5
+ Author-email: AIAuto Team <ainode@zeroone.ai>
6
+ Project-URL: Homepage, https://dashboard.aiauto.pangyo.ainode.ai
7
+ Project-URL: Repository, https://dashboard.aiauto.pangyo.ainode.ai
8
+ Project-URL: Documentation, https://dashboard.aiauto.pangyo.ainode.ai
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.8
22
+ Description-Content-Type: text/markdown
23
+ Requires-Dist: optuna>=3.0.0
24
+ Requires-Dist: requests>=2.25.0
25
+ Requires-Dist: grpcio>=1.48.0
26
+ Requires-Dist: grpcio-status>=1.48.0
27
+
28
+ # AIAuto - Hyperparameter Optimization Client Library
29
+
30
+ AIAuto는 Kubernetes 기반의 분산 HPO(Hyperparameter Optimization) 시스템을 위한 클라이언트 라이브러리입니다.
31
+ 사용자 python lib <-> Next.js 서버 사이 Connect RPC (HTTP/1.1) 통신 담당
32
+
33
+ ## 설치
34
+ - `pip install aiauto-client optuna`
35
+
36
+ ## API 레퍼런스
37
+
38
+ ### create_study 파라미터
39
+ - `study_name` (str): Study 이름
40
+ - `direction` (str): 단일 목적 최적화 방향 ("minimize" 또는 "maximize")
41
+ - `directions` (List[str]): 다중 목적 최적화 방향 리스트 (direction과 상호 배타적)
42
+ - `sampler` (object/dict): Optuna sampler 객체 또는 dict (선택적)
43
+ - `pruner` (object/dict): Optuna pruner 객체 또는 dict (선택적)
44
+
45
+ **주의**: `direction`과 `directions`는 둘 중 하나만 지정해야 합니다.
46
+
47
+ ### optimize 파라미터
48
+ - `objective` (Callable): Trial을 인자로 받는 목적 함수
49
+ - `n_trials` (int): 총 trial 수
50
+ - `parallelism` (int): 동시 실행 Pod 수 (기본값: 2)
51
+ - `requirements_file` (str): requirements.txt 파일 경로 (requirements_list와 상호 배타적)
52
+ - `requirements_list` (List[str]): 패키지 리스트 (requirements_file과 상호 배타적)
53
+ - `resources_requests` (Dict[str, str]): K8s 리소스 요청 (기본값: {"cpu": "256m", "memory": "256Mi"})
54
+ - `resources_limits` (Dict[str, str]): K8s 리소스 제한 (기본값: {"cpu": "256m", "memory": "256Mi"})
55
+ - `runtime_image` (str): 커스텀 런타임 이미지 (None이면 자동 선택)
56
+ - `use_gpu` (bool): GPU 사용 여부 (기본값: False)
57
+
58
+ **주의**: `requirements_file`과 `requirements_list`는 둘 중 하나만 지정해야 합니다.
59
+
60
+ ## 지원 런타임 이미지 확인
61
+ ```python
62
+ import aiauto
63
+
64
+ # 사용 가능한 이미지 확인
65
+ for image in aiauto.RUNTIME_IMAGES:
66
+ print(image)
67
+ ```
68
+
69
+ ## 실행 흐름
70
+ ### token 발급 # TODO
71
+ - `https://dashboard.aiauto.pangyo.ainode.ai` 에 접속하여 ainode 에 로그인 한 후
72
+ - `https://dashboard.aiauto.pangyo.ainode.ai/token` 으로 이동하여 aiauto 의 token 을 발급
73
+ - 아래 코드 처럼 발급한 token 을 넣어 AIAutoController singleton 객체를 초기화, OptunaWorkspace 를 활성화 시킨다
74
+ ```python
75
+ import aiauto
76
+
77
+ ac = aiauto.AIAutoController('<token>')
78
+ ```
79
+ - `https://dashboard.aiauto.pangyo.ainode.ai/workspace` 에서 생성된 OptunaWorkspace 와 optuna-dashboard 링크를 확인할 수 있음
80
+ - 아래 코드 처럼 study 를 생성하면 `https://dashboard.aiauto.pangyo.ainode.ai/study` 에서 확인할 수 있고 optuna-dashboard 링크에서도 확인 가능
81
+ ```python
82
+ study_wrapper = ac.create_study(
83
+ study_name='test',
84
+ direction='maximize', # or 'minimize'
85
+ )
86
+ ```
87
+ - 아래 코드 처럼 생성한 study 애서 objective 함수를 작성하여 넘겨주면 optimize 를 호출하면 `https://dashboard.aiauto.pangyo.ainode.ai/trialbatch` 에서 확인할 수 있고 optuna-dashboard 링크에서도 확인 가능
88
+ ```python
89
+ study_wrapper.optimize(
90
+ objective=func_with_parameter_trial,
91
+ n_trials=4,
92
+ parallelism=2,
93
+ use_gpu=False,
94
+ runtime_image=aiauto.RUNTIME_IMAGES[0],
95
+ )
96
+ ```
97
+
98
+ ## 빠른 시작
99
+
100
+ ### 1. 간단한 예제 (수학 함수 최적화)
101
+
102
+ ```python
103
+ import optuna
104
+ import aiauto
105
+
106
+
107
+ # `https://dashboard.aiauto.pangyo.ainode.ai` 에 접속하여 ainode 에 로그인 한 후 aiauto 의 token 을 발급
108
+ # AIAutoController singleton 객체를 초기화 하여, OptunaWorkspace 를 활성화 시킨다 (토큰은 한 번만 설정)
109
+ ac = aiauto.AIAutoController('<token>')
110
+ # `https://dashboard.aiauto.pangyo.ainode.ai/workspace` 에서 생성된 OptunaWorkspace 와 optuna-dashboard 링크를 확인할 수 있음
111
+
112
+ # StudyWrapper 생성
113
+ study_wrapper = ac.create_study(
114
+ study_name="simple_optimization",
115
+ direction="minimize"
116
+ # sampler=optuna.samplers.TPESampler(), # optuna 에서 제공하는 sampler 그대로 사용 가능, 참고 https://optuna.readthedocs.io/en/stable/reference/samplers/index.html
117
+ )
118
+ # `https://dashboard.aiauto.pangyo.ainode.ai/study` 에서 생성된 study 확인 가능
119
+
120
+ # objective 함수 정의
121
+ def objective(trial: optuna.trial.Trial):
122
+ """실제 실행은 사용자 로컬 컴퓨터가 아닌 서버에서 실행 될 함수"""
123
+ x = trial.suggest_float('x', -10, 10)
124
+ y = trial.suggest_float('y', -10, 10)
125
+ return (x - 2) ** 2 + (y - 3) ** 2
126
+
127
+ # 사용자 모델 학습 or 최적화 실행 (서버에서 병렬 실행)
128
+ study_wrapper.optimize(
129
+ objective,
130
+ n_trials=100,
131
+ parallelism=4 # 동시 실행 Pod 수
132
+ )
133
+ # `https://dashboard.aiauto.pangyo.ainode.ai/workspace` 에서 생성된 optuna-dashboard 링크에서 결과 확인 가능
134
+ ```
135
+
136
+ ### 2. PyTorch 모델 최적화 (Single Objective)
137
+
138
+ ```python
139
+ import optuna
140
+ import aiauto
141
+
142
+
143
+ # `https://dashboard.aiauto.pangyo.ainode.ai` 에 접속하여 ainode 에 로그인 한 후 aiauto 의 token 을 발급
144
+ # AIAutoController singleton 객체를 초기화 하여, OptunaWorkspace 를 활성화 시킨다 (토큰은 한 번만 설정)
145
+ ac = aiauto.AIAutoController('<token>')
146
+ # `https://dashboard.aiauto.pangyo.ainode.ai/workspace` 에서 생성된 OptunaWorkspace 와 optuna-dashboard 링크를 확인할 수 있음
147
+
148
+ # StudyWrapper 생성
149
+ study_wrapper = ac.create_study(
150
+ study_name="pytorch_optimization",
151
+ direction="minimize",
152
+ # sampler=optuna.samplers.TPESampler(), # optuna 에서 제공하는 sampler 그대로 사용 가능, 참고 https://optuna.readthedocs.io/en/stable/reference/samplers/index.html
153
+ pruner=optuna.pruners.PatientPruner( # optuna 에서 제공하는 pruner 그대로 사용 가능, 참고 https://optuna.readthedocs.io/en/stable/reference/pruners.html
154
+ optuna.pruners.MedianPruner(),
155
+ patience=4,
156
+ ),
157
+ )
158
+ # `https://dashboard.aiauto.pangyo.ainode.ai/study` 에서 생성된 study 확인 가능
159
+
160
+ # objective 함수 정의
161
+ # https://docs.pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html 참고
162
+ def objective(trial: optuna.trial.Trial):
163
+ """
164
+ 실제 실행은 사용자 로컬 컴퓨터가 아닌 서버에서 실행 될 함수
165
+ 모든 import는 함수 내부에 존재해야 함
166
+ """
167
+ import torch
168
+ from torch import nn, optim
169
+ from torch.utils.data import DataLoader, random_split, Subset
170
+ from torchvision import transforms, datasets
171
+ import torch.nn.functional as F
172
+
173
+ # 하이퍼파라미터 샘플링
174
+ lr = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
175
+ momentom = trial.suggest_float('momentom', 0.1, 0.99)
176
+ batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
177
+ epochs = trial.suggest_int('epochs', 10, 100, step=10)
178
+
179
+ # 모델 정의
180
+ class Net(nn.Module):
181
+ def __init__(self):
182
+ super().__init__()
183
+ self.conv1 = nn.Conv2d(3, 6, 5)
184
+ self.pool = nn.MaxPool2d(2, 2)
185
+ self.conv2 = nn.Conv2d(6, 16, 5)
186
+ self.fc1 = nn.Linear(16 * 5 * 5, 120)
187
+ self.fc2 = nn.Linear(120, 84)
188
+ self.fc3 = nn.Linear(84, 10)
189
+
190
+ def forward(self, x):
191
+ x = self.pool(F.relu(self.conv1(x)))
192
+ x = self.pool(F.relu(self.conv2(x)))
193
+ x = torch.flatten(x, 1) # flatten all dimensions except batch
194
+ x = F.relu(self.fc1(x))
195
+ x = F.relu(self.fc2(x))
196
+ x = self.fc3(x)
197
+ return x
198
+
199
+ # 모델 정의 및 학습 (GPU 자동 사용)
200
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
201
+ model = Net().to(device)
202
+ criterion = nn.CrossEntropyLoss()
203
+ optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentom)
204
+
205
+ # 데이터 로드
206
+ train_set = datasets.CIFAR10(
207
+ root="/tmp/cifar10_data", # Pod의 임시 디렉토리 사용
208
+ train=True,
209
+ download=True,
210
+ transform=[
211
+ transforms.ToTensor(),
212
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
213
+ ],
214
+ )
215
+ train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)
216
+
217
+ test_set = datasets.CIFAR10(
218
+ root="/tmp/cifar10_data", # Pod의 임시 디렉토리 사용
219
+ train=False,
220
+ download=True,
221
+ transform=[
222
+ transforms.ToTensor(),
223
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
224
+ ],
225
+ )
226
+ test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=2)
227
+
228
+ # 학습
229
+ min_epochs_for_pruning = max(50, epochs // 5) # 최소 50 epoch 또는 전체의 1/5 후부터 pruning
230
+ total_loss = 0.0
231
+ for epoch in range(epochs): # loop over the dataset multiple times
232
+ running_loss = 0.0
233
+ model.train()
234
+ for i, (inputs, targets) in enumerate(train_loader, 0):
235
+ inputs, targets = inputs.to(device), targets.to(device)
236
+ # zero the parameter gradients
237
+ optimizer.zero_grad()
238
+ # forward + backward + optimize
239
+ outputs = model(inputs)
240
+ loss = criterion(outputs, targets)
241
+ loss.backward()
242
+ optimizer.step()
243
+
244
+ # print statistics
245
+ running_loss += loss.item()
246
+ if i % 2000 == 1999: # print every 2000 mini-batches
247
+ print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
248
+
249
+ # intermediate result 보고 및 초기 중단 검사 - 최소 epochs 후 부터만 pruning
250
+ trial.report(running_loss, epoch)
251
+ total_loss += running_loss
252
+ if epoch >= min_epochs_for_pruning and trial.should_prune():
253
+ raise optuna.TrialPruned()
254
+
255
+ return total_loss
256
+
257
+ # GPU Pod에서 실행
258
+ study_wrapper.optimize(
259
+ objective,
260
+ n_trials=100,
261
+ parallelism=4,
262
+ use_gpu=True, # GPU 사용
263
+ requirements_list=['torch', 'torchvision'] # Pod에서 자동 설치
264
+ )
265
+ ```
266
+
267
+ ### 3. Multi-Objective 최적화 (Accuracy + FLOPS)
268
+
269
+ ```python
270
+ import optuna
271
+ import aiauto
272
+
273
+
274
+ # `https://dashboard.aiauto.pangyo.ainode.ai` 에 접속하여 ainode 에 로그인 한 후 aiauto 의 token 을 발급
275
+ # AIAutoController singleton 객체를 초기화 하여, OptunaWorkspace 를 활성화 시킨다 (토큰은 한 번만 설정)
276
+ ac = aiauto.AIAutoController('<token>')
277
+ # `https://dashboard.aiauto.pangyo.ainode.ai/workspace` 에서 생성된 OptunaWorkspace 와 optuna-dashboard 링크를 확인할 수 있음
278
+
279
+ # StudyWrapper 생성
280
+ study_wrapper = ac.create_study(
281
+ study_name="pytorch_multiple_optimization",
282
+ direction=["minimize", "minimize"], # loss minimize, FLOPS minimize
283
+ # sampler=optuna.samplers.TPESampler(), # optuna 에서 제공하는 sampler 그대로 사용 가능, 참고 https://optuna.readthedocs.io/en/stable/reference/samplers/index.html
284
+ )
285
+ # `https://dashboard.aiauto.pangyo.ainode.ai/study` 에서 생성된 study 확인 가능
286
+
287
+ # objective 함수 정의
288
+ # https://docs.pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html 참고
289
+ def objective(trial: optuna.trial.Trial):
290
+ """
291
+ 실제 실행은 사용자 로컬 컴퓨터가 아닌 서버에서 실행 될 함수
292
+ 모든 import는 함수 내부에 존재해야 함
293
+ """
294
+ import torch
295
+ from torch import nn, optim
296
+ from torch.utils.data import DataLoader, random_split, Subset
297
+ from torchvision import transforms, datasets
298
+ import torch.nn.functional as F
299
+ from fvcore.nn import FlopCountAnalysis
300
+
301
+ # 하이퍼파라미터 샘플링
302
+ lr = trial.suggest_float('learning_rate', 1e-5, 1e-1, log=True)
303
+ momentom = trial.suggest_float('momentom', 0.1, 0.99)
304
+ batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128])
305
+ epochs = trial.suggest_int('epochs', 10, 100, step=10)
306
+
307
+ # 모델 정의
308
+ class Net(nn.Module):
309
+ def __init__(self):
310
+ super().__init__()
311
+ self.conv1 = nn.Conv2d(3, 6, 5)
312
+ self.pool = nn.MaxPool2d(2, 2)
313
+ self.conv2 = nn.Conv2d(6, 16, 5)
314
+ self.fc1 = nn.Linear(16 * 5 * 5, 120)
315
+ self.fc2 = nn.Linear(120, 84)
316
+ self.fc3 = nn.Linear(84, 10)
317
+
318
+ def forward(self, x):
319
+ x = self.pool(F.relu(self.conv1(x)))
320
+ x = self.pool(F.relu(self.conv2(x)))
321
+ x = torch.flatten(x, 1) # flatten all dimensions except batch
322
+ x = F.relu(self.fc1(x))
323
+ x = F.relu(self.fc2(x))
324
+ x = self.fc3(x)
325
+ return x
326
+
327
+ # 모델 정의 및 학습 (GPU 자동 사용)
328
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
329
+ model = Net().to(device)
330
+ criterion = nn.CrossEntropyLoss()
331
+ optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentom)
332
+
333
+ # 데이터 로드
334
+ train_set = datasets.CIFAR10(
335
+ root="/tmp/cifar10_data", # Pod의 임시 디렉토리 사용
336
+ train=True,
337
+ download=True,
338
+ transform=[
339
+ transforms.ToTensor(),
340
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
341
+ ],
342
+ )
343
+ train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=2)
344
+
345
+ test_set = datasets.CIFAR10(
346
+ root="/tmp/cifar10_data", # Pod의 임시 디렉토리 사용
347
+ train=False,
348
+ download=True,
349
+ transform=[
350
+ transforms.ToTensor(),
351
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
352
+ ],
353
+ )
354
+ test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=2)
355
+
356
+ # 학습
357
+ total_loss = 0.0
358
+ # multiple objective 는 pruning 미지원
359
+ for epoch in range(epochs): # loop over the dataset multiple times
360
+ running_loss = 0.0
361
+ model.train()
362
+ for i, (inputs, targets) in enumerate(train_loader, 0):
363
+ inputs, targets = inputs.to(device), targets.to(device)
364
+ # zero the parameter gradients
365
+ optimizer.zero_grad()
366
+ # forward + backward + optimize
367
+ outputs = model(inputs)
368
+ loss = criterion(outputs, targets)
369
+ loss.backward()
370
+ optimizer.step()
371
+
372
+ # print statistics
373
+ running_loss += loss.item()
374
+ if i % 2000 == 1999: # print every 2000 mini-batches
375
+ print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
376
+
377
+ # multiple objective 는 pruning 미지원
378
+
379
+ # FLOPS 계산
380
+ dummy_input = torch.randn(1, 3, 32, 32).to(device)
381
+ flops = FlopCountAnalysis(model, (dummy_input,)).total()
382
+
383
+ return total_loss, flops
384
+
385
+ # GPU Pod에서 실행
386
+ study_wrapper.optimize(
387
+ objective,
388
+ n_trials=100,
389
+ parallelism=4,
390
+ use_gpu=True, # GPU 사용
391
+ requirements_list=['torch', 'torchvision', 'fvcore'] # Pod에서 자동 설치
392
+ )
393
+ ```
394
+
395
+ ### 4. Ask/Tell 패턴 및 Optuna 자체의 Study
396
+
397
+ ```python
398
+ import optuna
399
+ import aiauto
400
+
401
+ # `https://dashboard.aiauto.pangyo.ainode.ai` 에 접속하여 ainode 에 로그인 한 후 aiauto 의 token 을 발급
402
+ # AIAutoController singleton 객체를 초기화 하여, OptunaWorkspace 를 활성화 시킨다 (토큰은 한 번만 설정)
403
+ ac = aiauto.AIAutoController('<token>')
404
+ # `https://dashboard.aiauto.pangyo.ainode.ai/workspace` 에서 생성된 OptunaWorkspace 와 optuna-dashboard 링크를 확인할 수 있음
405
+
406
+ # Study 생성
407
+ study_wrapper = ac.create_study(
408
+ study_name="test",
409
+ direction='minimize',
410
+ # sampler=optuna.samplers.TPESampler(), # optuna 에서 제공하는 sampler 그대로 사용 가능, 참고 https://optuna.readthedocs.io/en/stable/reference/samplers/index.html
411
+ # pruner=optuna.pruners.PatientPruner( # optuna 에서 제공하는 pruner 그대로 사용 가능, 참고 https://optuna.readthedocs.io/en/stable/reference/pruners.html
412
+ # optuna.pruners.MedianPruner(),
413
+ # patience=4,
414
+ # )
415
+ )
416
+ # `https://dashboard.aiauto.pangyo.ainode.ai/study` 에서 생성된 study 확인 가능
417
+
418
+ # 실제 optuna.Study 객체 획득 (로컬에서 ask/tell 가능)
419
+ study = study_wrapper.get_study()
420
+
421
+ # Ask/Tell 패턴으로 최적화
422
+ trial = study.ask()
423
+
424
+ # 파라미터 최적화
425
+ x = trial.suggest_float('x', -10, 10)
426
+ y = trial.suggest_float('y', -10, 10)
427
+
428
+ # 사용자 모델 학습 or 최적화 실행 (서버에서 병렬 실행)
429
+ ret = (x - 2) ** 2 + (y - 3) ** 2
430
+
431
+ # 결과 보고
432
+ study.tell(trial, ret)
433
+ # `https://dashboard.aiauto.pangyo.ainode.ai/workspace` 에서 생성된 optuna-dashboard 링크에서 결과 확인 가능
434
+ ```
435
+
436
+ # lib build
437
+ ```bash
438
+ make build push
439
+ ```
@@ -0,0 +1,10 @@
1
+ aiauto/__init__.py,sha256=sF7sJaXg7-MqolSYLxsaXAir1dBzARhXLrHo7zLsupg,345
2
+ aiauto/_config.py,sha256=DaRTIZlph9T3iuW-Cy4fkw8i3bXB--gMtW947SLZZNs,159
3
+ aiauto/constants.py,sha256=rBibGOQHHrdkwaai92-3I8-N0cu-B4CoCoQbG9-Cl8k,821
4
+ aiauto/core.py,sha256=DQW9uvVNqP9J9s1IFx969upw10NQLHJMNsudHnauk6A,9840
5
+ aiauto/http_client.py,sha256=t1gxeM5-d5bsVoFWgaNcTrt_WWUXuMuxge9gDlEqhoA,2086
6
+ aiauto/serializer.py,sha256=_iPtEoqW8RTKOZ6UrC7CzOqoangpPYzeL7MQfIdmov8,1568
7
+ aiauto_client-0.1.6.dist-info/METADATA,sha256=FOC7h1MkE6dlPa7urKnWX2sIUqe3Ac40TJNC1JgFzu4,17487
8
+ aiauto_client-0.1.6.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
9
+ aiauto_client-0.1.6.dist-info/top_level.txt,sha256=Sk2ctO9_Bf_tAPwq1x6Vfl6OuL29XzwMTO4F_KG6oJE,7
10
+ aiauto_client-0.1.6.dist-info/RECORD,,
aiauto/api.py DELETED
@@ -1,46 +0,0 @@
1
- from typing import Optional, List, Union
2
- from .serializer import object_to_json
3
- from .core import StudyWrapper, AIAutoController
4
-
5
-
6
- def create_study(
7
- study_name: str,
8
- token: str,
9
- direction: Optional[str] = None,
10
- directions: Optional[List[str]] = None,
11
- sampler: Union[object, dict, None] = None,
12
- pruner: Union[object, dict, None] = None
13
- ) -> StudyWrapper:
14
- if not direction and not directions:
15
- raise ValueError("Either 'direction' or 'directions' must be specified")
16
-
17
- if direction and directions:
18
- raise ValueError("Cannot specify both 'direction' and 'directions'")
19
-
20
- try:
21
- # Initialize controller (which ensures workspace)
22
- controller = AIAutoController(token)
23
-
24
- # Prepare request data for CreateStudy
25
- request_data = {
26
- "spec": {
27
- "studyName": study_name,
28
- "direction": direction or "",
29
- "directions": directions or [],
30
- "samplerJson": object_to_json(sampler),
31
- "prunerJson": object_to_json(pruner)
32
- }
33
- }
34
-
35
- # Call CreateStudy RPC
36
- response = controller.client.call_rpc("CreateStudy", request_data)
37
-
38
- # Return StudyWrapper
39
- return StudyWrapper(
40
- study_name=response.get("studyName", study_name),
41
- storage=controller.storage,
42
- controller=controller
43
- )
44
-
45
- except Exception as e:
46
- raise RuntimeError(f"Failed to create study: {e}") from e
@@ -1,102 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: aiauto-client
3
- Version: 0.1.4
4
- Summary: AI Auto HPO (Hyperparameter Optimization) Client Library
5
- Author-email: AIAuto Team <ainode@zeroone.ai>
6
- Project-URL: Homepage, https://aiauto.cloude.ainode.ai
7
- Project-URL: Repository, https://aiauto.cloude.ainode.ai
8
- Project-URL: Documentation, https://aiauto.cloude.ainode.ai
9
- Classifier: Development Status :: 3 - Alpha
10
- Classifier: Intended Audience :: Developers
11
- Classifier: Intended Audience :: Science/Research
12
- Classifier: Programming Language :: Python :: 3
13
- Classifier: Programming Language :: Python :: 3.8
14
- Classifier: Programming Language :: Python :: 3.9
15
- Classifier: Programming Language :: Python :: 3.10
16
- Classifier: Programming Language :: Python :: 3.11
17
- Classifier: Programming Language :: Python :: 3.12
18
- Classifier: Programming Language :: Python :: 3.13
19
- Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
- Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
- Requires-Python: >=3.8
22
- Description-Content-Type: text/markdown
23
- Requires-Dist: optuna>=3.0.0
24
- Requires-Dist: requests>=2.25.0
25
- Requires-Dist: grpcio>=1.48.0
26
- Requires-Dist: grpcio-status>=1.48.0
27
-
28
- # AIAuto - Hyperparameter Optimization Client Library
29
-
30
- AIAuto는 Kubernetes 기반의 분산 HPO(Hyperparameter Optimization) 시스템을 위한 클라이언트 라이브러리입니다.
31
- 사용자 python lib <-> Next.js 서버 사이 gRPC 통신 담당
32
-
33
- ## lib build
34
- - `make build push`
35
-
36
- ## 설치
37
- - `uv add aiauto-client`
38
-
39
- ## 빠른 시작
40
-
41
- ### Study 생성 및 Ask/Tell 패턴
42
- ```python
43
- import aiauto
44
-
45
- # StudyWrapper 생성 (JWT 토큰 필요)
46
- studyWrapper = aiauto.create_study(
47
- study_name='my_optimization',
48
- token='your_jwt_token',
49
- direction='maximize'
50
- )
51
-
52
- # 실제 optuna.Study 객체 획득 (로컬에서 ask/tell 가능)
53
- study = studyWrapper.get_study()
54
-
55
- # Ask/Tell 패턴으로 최적화
56
- trial = study.ask()
57
- params = trial.params
58
-
59
- # 사용자 모델 학습
60
- accuracy = train_model(params)
61
-
62
- # 결과 보고
63
- study.tell(trial, accuracy)
64
- ```
65
-
66
- ### 분산 최적화 (Pod 실행)
67
- ```python
68
- import aiauto
69
-
70
- def objective(trial):
71
- tc = aiauto.TrialController(trial)
72
-
73
- # 하이퍼파라미터 샘플링
74
- lr = trial.suggest_float('lr', 1e-5, 1e-1, log=True)
75
- batch_size = trial.suggest_int('batch_size', 16, 128)
76
-
77
- # 모델 학습 로직
78
- accuracy = train_model(lr, batch_size)
79
-
80
- tc.log(f'lr: {lr}, batch_size: {batch_size}, accuracy: {accuracy}')
81
-
82
- return accuracy
83
-
84
- # StudyWrapper 생성
85
- studyWrapper = aiauto.create_study(
86
- study_name='distributed_optimization',
87
- token='your_jwt_token',
88
- direction='maximize'
89
- )
90
-
91
- # 분산 최적화 실행 (Kubernetes Pod에서 실행)
92
- studyWrapper.optimize(
93
- objective=objective,
94
- n_trials=100,
95
- parallelism=4,
96
- requirements_list=['torch==2.0.0', 'torchvision==0.15.0']
97
- )
98
-
99
- # 실시간 상태 모니터링
100
- status = studyWrapper.get_status()
101
- print(f"Active: {status['count_active']}, Completed: {status['count_completed']}")
102
- ```
@@ -1,11 +0,0 @@
1
- aiauto/__init__.py,sha256=TgD2ZvIHb7oKJb-HjUl3WfXXtuWLien0sybSy9onjL8,395
2
- aiauto/_config.py,sha256=DaRTIZlph9T3iuW-Cy4fkw8i3bXB--gMtW947SLZZNs,159
3
- aiauto/api.py,sha256=hzoVZMwKtH2EaAM1bY67grp6cenltCl8kdG8YHPegvk,1517
4
- aiauto/constants.py,sha256=UhDCLFoPE89XrHB3SEnZR3YUuzajgugMGX80KYx_qc0,939
5
- aiauto/core.py,sha256=dF-J8jxRP_FkQtxYYsrgza8OQHNzoPLN1cjYeUaqX1s,8076
6
- aiauto/http_client.py,sha256=t1gxeM5-d5bsVoFWgaNcTrt_WWUXuMuxge9gDlEqhoA,2086
7
- aiauto/serializer.py,sha256=_iPtEoqW8RTKOZ6UrC7CzOqoangpPYzeL7MQfIdmov8,1568
8
- aiauto_client-0.1.4.dist-info/METADATA,sha256=g86ft1ph8ebOUxQRafbgsYvxdlHRygOxvuS3vpSbfgQ,3001
9
- aiauto_client-0.1.4.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
10
- aiauto_client-0.1.4.dist-info/top_level.txt,sha256=Sk2ctO9_Bf_tAPwq1x6Vfl6OuL29XzwMTO4F_KG6oJE,7
11
- aiauto_client-0.1.4.dist-info/RECORD,,