@unlimiting/unlimitgen 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/publish.yml +48 -0
- package/README.md +129 -0
- package/dist/cli.js +123 -0
- package/dist/core/catalog.js +128 -0
- package/dist/core/generate.js +28 -0
- package/dist/core/types.js +1 -0
- package/dist/providers/gemini.js +124 -0
- package/dist/providers/openaiLike.js +98 -0
- package/dist/utils/auth.js +73 -0
- package/dist/utils/io.js +40 -0
- package/dist/utils/options.js +39 -0
- package/dist/utils/parts.js +33 -0
- package/eslint.config.js +21 -0
- package/package.json +43 -0
- package/skills/ugen/SKILL.md +163 -0
- package/src/cli.ts +146 -0
- package/src/core/catalog.ts +141 -0
- package/src/core/generate.ts +31 -0
- package/src/core/types.ts +31 -0
- package/src/providers/gemini.ts +142 -0
- package/src/providers/openaiLike.ts +122 -0
- package/src/utils/auth.ts +87 -0
- package/src/utils/io.ts +40 -0
- package/src/utils/options.ts +35 -0
- package/src/utils/parts.ts +42 -0
- package/tsconfig.json +16 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
name: Publish to npm
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
inputs:
|
|
8
|
+
tag:
|
|
9
|
+
description: '릴리즈 태그 (예: v0.2.0)'
|
|
10
|
+
required: false
|
|
11
|
+
type: string
|
|
12
|
+
|
|
13
|
+
permissions:
|
|
14
|
+
contents: read
|
|
15
|
+
id-token: write
|
|
16
|
+
|
|
17
|
+
jobs:
|
|
18
|
+
publish:
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
environment: npm
|
|
21
|
+
|
|
22
|
+
steps:
|
|
23
|
+
- name: Checkout
|
|
24
|
+
uses: actions/checkout@v4
|
|
25
|
+
|
|
26
|
+
- name: Setup Node.js
|
|
27
|
+
uses: actions/setup-node@v4
|
|
28
|
+
with:
|
|
29
|
+
node-version: 22
|
|
30
|
+
registry-url: https://registry.npmjs.org
|
|
31
|
+
|
|
32
|
+
- name: Install dependencies
|
|
33
|
+
run: npm ci
|
|
34
|
+
|
|
35
|
+
- name: Build
|
|
36
|
+
run: npm run build
|
|
37
|
+
|
|
38
|
+
- name: Lint
|
|
39
|
+
run: npm run lint
|
|
40
|
+
|
|
41
|
+
- name: Verify package name
|
|
42
|
+
run: |
|
|
43
|
+
PKG_NAME=$(node -p "require('./package.json').name")
|
|
44
|
+
echo "package: $PKG_NAME"
|
|
45
|
+
test "$PKG_NAME" = "@unlimiting/unlimitgen"
|
|
46
|
+
|
|
47
|
+
- name: Publish (Trusted Publishing + Provenance)
|
|
48
|
+
run: npm publish --access public --provenance
|
package/README.md
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
**ugen**
|
|
2
|
+
|
|
3
|
+
Gemini / OpenAI / Grok(xAI) 모델로 이미지/동영상을 생성하는 CLI입니다.
|
|
4
|
+
|
|
5
|
+
- 이미지
|
|
6
|
+
- Gemini: `gemini-3-pro-image-preview`, `gemini-2.5-flash-image-preview`, `imagen-4.0-generate-001`, `imagen-4.0-ultra-generate-001`, `imagen-4.0-fast-generate-001`
|
|
7
|
+
- OpenAI: `gpt-image-1.5`, `gpt-image-1`, `gpt-image-1-mini`
|
|
8
|
+
- Grok: `grok-imagine-image`, `grok-imagine-image-pro`
|
|
9
|
+
- 동영상
|
|
10
|
+
- Gemini: `veo-3.1-generate-preview`, `veo-3.1-fast-generate-preview`
|
|
11
|
+
- OpenAI: `sora-2`, `sora-2-pro`
|
|
12
|
+
- Grok: `grok-imagine-video`
|
|
13
|
+
|
|
14
|
+
> 참고: xAI(Grok)는 공식 문서의 OpenAI 호환 SDK 경로를 사용합니다.
|
|
15
|
+
|
|
16
|
+
## 설치
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
npm install
|
|
20
|
+
npm run build
|
|
21
|
+
npm link
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
그 뒤 `ugen` 명령을 사용할 수 있습니다.
|
|
25
|
+
|
|
26
|
+
## 인증(비밀번호 입력 + 저장)
|
|
27
|
+
|
|
28
|
+
토큰을 저장하려면 아래 명령을 사용하세요.
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
ugen auth --provider gemini
|
|
32
|
+
ugen auth --provider openai
|
|
33
|
+
ugen auth --provider grok
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
토큰 저장 후에는 생성 명령에서 자동으로 읽습니다.
|
|
37
|
+
우선순위는 `환경변수 > 저장 토큰 > 즉시 비밀번호 입력` 입니다.
|
|
38
|
+
|
|
39
|
+
- Gemini: `GEMINI_API_KEY` 또는 `GOOGLE_API_KEY`
|
|
40
|
+
- OpenAI: `OPENAI_API_KEY`
|
|
41
|
+
- Grok(xAI): `XAI_API_KEY`
|
|
42
|
+
|
|
43
|
+
## 핵심 사용법
|
|
44
|
+
|
|
45
|
+
모델 목록과 모델별 옵션 키 보기:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
ugen models
|
|
49
|
+
ugen models --provider gemini --modality video
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
이미지 생성(입력 순서 보장):
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
ugen generate image \
|
|
56
|
+
--provider openai \
|
|
57
|
+
--model gpt-image-1.5 \
|
|
58
|
+
--part text:"고양이 우주비행사" text:"필름 카메라 스타일" \
|
|
59
|
+
--option size=1024x1024 quality=high
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
이미지 + 텍스트 혼합 입력(여러 개, 순서 보장):
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
ugen generate image \
|
|
66
|
+
--provider gemini \
|
|
67
|
+
--model gemini-2.5-flash-image-preview \
|
|
68
|
+
--part text:"첫 이미지의 구도를 유지" image:./ref1.png text:"두 번째 이미지 색감을 반영" image:./ref2.jpg
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
동영상 생성:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
ugen generate video \
|
|
75
|
+
--provider openai \
|
|
76
|
+
--model sora-2 \
|
|
77
|
+
--part text:"네온 도시를 달리는 고양이" image:./first-frame.png \
|
|
78
|
+
--option seconds=8 size=1280x720
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
고급 옵션(JSON) 병합:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
ugen generate video \
|
|
85
|
+
--provider gemini \
|
|
86
|
+
--model veo-3.1-generate-preview \
|
|
87
|
+
--part text:"바닷가 일출 타임랩스" \
|
|
88
|
+
--options-json '{"numberOfVideos":1,"durationSeconds":8,"aspectRatio":"16:9"}'
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## 옵션 구조
|
|
92
|
+
|
|
93
|
+
- `--part <type:value...>`
|
|
94
|
+
- 반복 가능
|
|
95
|
+
- `text:...`, `image:/path/to/file` 지원
|
|
96
|
+
- 입력 순서는 그대로 모델 요청에 반영
|
|
97
|
+
- `--option <key=value...>`
|
|
98
|
+
- 반복 가능
|
|
99
|
+
- 숫자/불리언/null 자동 파싱
|
|
100
|
+
- `--options-json <json>`
|
|
101
|
+
- `--option` 위에 merge
|
|
102
|
+
- 비디오 전용
|
|
103
|
+
- `--poll-interval-ms` (기본 `5000`)
|
|
104
|
+
- `--timeout-ms` (기본 `900000`)
|
|
105
|
+
|
|
106
|
+
## 구현 메모
|
|
107
|
+
|
|
108
|
+
- Gemini SDK: `@google/genai`
|
|
109
|
+
- OpenAI SDK: `openai`
|
|
110
|
+
- xAI(Grok): OpenAI 호환 SDK(base URL `https://api.x.ai/v1`)
|
|
111
|
+
- 출력물: 기본 `./outputs`
|
|
112
|
+
|
|
113
|
+
## npm 배포(Trusted Publish)
|
|
114
|
+
|
|
115
|
+
- 워크플로우: `.github/workflows/publish.yml`
|
|
116
|
+
|
|
117
|
+
Release(Published) 생성 시 OIDC 기반 trusted publish로 배포됩니다.
|
|
118
|
+
|
|
119
|
+
## ugen 스킬 문서
|
|
120
|
+
|
|
121
|
+
- 경로: `skills/ugen/SKILL.md`
|
|
122
|
+
- 설치/인증/사용 흐름/트러블슈팅을 사용자 관점에서 정리한 가이드입니다.
|
|
123
|
+
|
|
124
|
+
## 옵션 정보 출처(공식 문서)
|
|
125
|
+
|
|
126
|
+
- Google GenAI SDK (`generateImages`, `generateVideos`, `generateContent`): https://www.npmjs.com/package/@google/genai
|
|
127
|
+
- OpenAI TypeScript SDK (`images.generate/edit`, `videos.create/retrieve/downloadContent`): https://github.com/openai/openai-node
|
|
128
|
+
- OpenAI Sora 모델/비디오 가이드: https://platform.openai.com/docs/models/sora-2
|
|
129
|
+
- xAI API docs (OpenAI 호환 사용): https://docs.x.ai/docs/overview
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { Command } from 'commander';
|
|
3
|
+
import { MODEL_CATALOG } from './core/catalog.js';
|
|
4
|
+
import { generateImage, generateVideo } from './core/generate.js';
|
|
5
|
+
import { authAndStoreToken, getTokenFilePath, resolveApiKey } from './utils/auth.js';
|
|
6
|
+
import { mergeOptions, parseOptionPairs } from './utils/options.js';
|
|
7
|
+
import { parseParts } from './utils/parts.js';
|
|
8
|
+
const program = new Command();
|
|
9
|
+
program
|
|
10
|
+
.name('ugen')
|
|
11
|
+
.description('Gemini/OpenAI/Grok 이미지/비디오 생성 CLI')
|
|
12
|
+
.version('0.1.0');
|
|
13
|
+
program
|
|
14
|
+
.command('models')
|
|
15
|
+
.description('지원 모델과 옵션 확인')
|
|
16
|
+
.option('-p, --provider <provider>', 'provider 필터 (gemini|openai|grok)')
|
|
17
|
+
.option('-m, --modality <modality>', 'modality 필터 (image|video)')
|
|
18
|
+
.action((opts) => {
|
|
19
|
+
const items = MODEL_CATALOG.filter((item) => (!opts.provider || item.provider === opts.provider) && (!opts.modality || item.modality === opts.modality));
|
|
20
|
+
if (items.length === 0) {
|
|
21
|
+
console.log('조건에 맞는 모델이 없습니다.');
|
|
22
|
+
return;
|
|
23
|
+
}
|
|
24
|
+
for (const item of items) {
|
|
25
|
+
console.log(`- [${item.provider}/${item.modality}] ${item.model}`);
|
|
26
|
+
console.log(` 입력지원: text=${item.supportsText}, image=${item.supportsImageInput}`);
|
|
27
|
+
console.log(` 옵션: ${item.options.join(', ')}`);
|
|
28
|
+
if (item.notes) {
|
|
29
|
+
console.log(` 비고: ${item.notes}`);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
});
|
|
33
|
+
const generate = program.command('generate').description('콘텐츠 생성');
|
|
34
|
+
program
|
|
35
|
+
.command('auth')
|
|
36
|
+
.description('provider 토큰을 비밀번호 입력으로 저장')
|
|
37
|
+
.requiredOption('-p, --provider <provider>', 'gemini|openai|grok')
|
|
38
|
+
.action(async (opts) => {
|
|
39
|
+
const provider = assertProvider(opts.provider);
|
|
40
|
+
await authAndStoreToken(provider);
|
|
41
|
+
console.log(`저장 완료: ${provider} 토큰`);
|
|
42
|
+
console.log(`저장 위치: ${getTokenFilePath()}`);
|
|
43
|
+
});
|
|
44
|
+
generate
|
|
45
|
+
.command('image')
|
|
46
|
+
.description('이미지 생성')
|
|
47
|
+
.requiredOption('-p, --provider <provider>', 'gemini|openai|grok')
|
|
48
|
+
.requiredOption('-m, --model <model>', '모델 ID')
|
|
49
|
+
.requiredOption('--part <part...>', '순서 보장 입력. 예: text:고양이 image:./cat.png text:배경은 숲')
|
|
50
|
+
.option('-o, --out <dir>', '출력 폴더', './outputs')
|
|
51
|
+
.option('--option <pair...>', '모델 옵션 key=value. 예: quality=high size=1024x1024')
|
|
52
|
+
.option('--options-json <json>', '모델 옵션 JSON 문자열')
|
|
53
|
+
.action(async (opts) => {
|
|
54
|
+
const provider = assertProvider(opts.provider);
|
|
55
|
+
const apiKey = await resolveApiKey(provider);
|
|
56
|
+
const parts = parseParts(opts.part);
|
|
57
|
+
const options = mergeOptions(parseOptionPairs(opts.option), opts.optionsJson);
|
|
58
|
+
const result = await generateImage({
|
|
59
|
+
provider,
|
|
60
|
+
model: normalizeModel(opts.model),
|
|
61
|
+
parts,
|
|
62
|
+
outDir: opts.out,
|
|
63
|
+
options
|
|
64
|
+
}, apiKey);
|
|
65
|
+
printResult(result);
|
|
66
|
+
});
|
|
67
|
+
generate
|
|
68
|
+
.command('video')
|
|
69
|
+
.description('동영상 생성')
|
|
70
|
+
.requiredOption('-p, --provider <provider>', 'gemini|openai|grok')
|
|
71
|
+
.requiredOption('-m, --model <model>', '모델 ID')
|
|
72
|
+
.requiredOption('--part <part...>', '순서 보장 입력. 예: text:광고 영상 image:./first-frame.png')
|
|
73
|
+
.option('-o, --out <dir>', '출력 폴더', './outputs')
|
|
74
|
+
.option('--option <pair...>', '모델 옵션 key=value. 예: seconds=8 size=1280x720')
|
|
75
|
+
.option('--options-json <json>', '모델 옵션 JSON 문자열')
|
|
76
|
+
.option('--poll-interval-ms <ms>', '상태 폴링 주기(ms)', '5000')
|
|
77
|
+
.option('--timeout-ms <ms>', '타임아웃(ms)', '900000')
|
|
78
|
+
.action(async (opts) => {
|
|
79
|
+
const provider = assertProvider(opts.provider);
|
|
80
|
+
const apiKey = await resolveApiKey(provider);
|
|
81
|
+
const parts = parseParts(opts.part);
|
|
82
|
+
const options = mergeOptions(parseOptionPairs(opts.option), opts.optionsJson);
|
|
83
|
+
const result = await generateVideo({
|
|
84
|
+
provider,
|
|
85
|
+
model: normalizeModel(opts.model),
|
|
86
|
+
parts,
|
|
87
|
+
outDir: opts.out,
|
|
88
|
+
options,
|
|
89
|
+
pollIntervalMs: Number(opts.pollIntervalMs),
|
|
90
|
+
timeoutMs: Number(opts.timeoutMs)
|
|
91
|
+
}, apiKey);
|
|
92
|
+
printResult(result);
|
|
93
|
+
});
|
|
94
|
+
program.parseAsync(process.argv).catch((error) => {
|
|
95
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
96
|
+
console.error(`오류: ${message}`);
|
|
97
|
+
process.exit(1);
|
|
98
|
+
});
|
|
99
|
+
function assertProvider(input) {
|
|
100
|
+
if (input === 'gemini' || input === 'openai' || input === 'grok') {
|
|
101
|
+
return input;
|
|
102
|
+
}
|
|
103
|
+
throw new Error(`지원하지 않는 provider: ${input}`);
|
|
104
|
+
}
|
|
105
|
+
function printResult(result) {
|
|
106
|
+
console.log(`완료: ${result.provider}/${result.modality}/${result.model}`);
|
|
107
|
+
if (result.outputs.length === 0) {
|
|
108
|
+
console.log('출력 파일이 없습니다. (모델 응답에 바이너리 결과 미포함)');
|
|
109
|
+
}
|
|
110
|
+
else {
|
|
111
|
+
for (const output of result.outputs) {
|
|
112
|
+
console.log(`- ${output}`);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (result.meta) {
|
|
116
|
+
console.log(`meta: ${JSON.stringify(result.meta)}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
function normalizeModel(model) {
|
|
120
|
+
const lowered = model.toLowerCase();
|
|
121
|
+
const fromCatalog = MODEL_CATALOG.find((item) => item.model.toLowerCase() === lowered);
|
|
122
|
+
return fromCatalog?.model ?? model;
|
|
123
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
export const MODEL_CATALOG = [
|
|
2
|
+
{
|
|
3
|
+
provider: 'gemini',
|
|
4
|
+
modality: 'image',
|
|
5
|
+
model: 'gemini-3-pro-image-preview',
|
|
6
|
+
supportsText: true,
|
|
7
|
+
supportsImageInput: true,
|
|
8
|
+
options: ['responseModalities', 'temperature', 'topP', 'topK', 'seed']
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
provider: 'gemini',
|
|
12
|
+
modality: 'image',
|
|
13
|
+
model: 'gemini-2.5-flash-image-preview',
|
|
14
|
+
supportsText: true,
|
|
15
|
+
supportsImageInput: true,
|
|
16
|
+
options: ['responseModalities', 'temperature', 'topP', 'topK', 'seed']
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
provider: 'gemini',
|
|
20
|
+
modality: 'image',
|
|
21
|
+
model: 'imagen-4.0-generate-001',
|
|
22
|
+
supportsText: true,
|
|
23
|
+
supportsImageInput: false,
|
|
24
|
+
options: ['negativePrompt', 'numberOfImages', 'aspectRatio', 'guidanceScale', 'seed', 'outputMimeType', 'imageSize', 'enhancePrompt']
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
provider: 'gemini',
|
|
28
|
+
modality: 'image',
|
|
29
|
+
model: 'imagen-4.0-ultra-generate-001',
|
|
30
|
+
supportsText: true,
|
|
31
|
+
supportsImageInput: false,
|
|
32
|
+
options: ['negativePrompt', 'numberOfImages', 'aspectRatio', 'guidanceScale', 'seed', 'outputMimeType', 'imageSize', 'enhancePrompt']
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
provider: 'gemini',
|
|
36
|
+
modality: 'image',
|
|
37
|
+
model: 'imagen-4.0-fast-generate-001',
|
|
38
|
+
supportsText: true,
|
|
39
|
+
supportsImageInput: false,
|
|
40
|
+
options: ['negativePrompt', 'numberOfImages', 'aspectRatio', 'guidanceScale', 'seed', 'outputMimeType', 'imageSize', 'enhancePrompt']
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
provider: 'openai',
|
|
44
|
+
modality: 'image',
|
|
45
|
+
model: 'gpt-image-1.5',
|
|
46
|
+
supportsText: true,
|
|
47
|
+
supportsImageInput: true,
|
|
48
|
+
options: ['background', 'moderation', 'n', 'output_format', 'output_compression', 'quality', 'size', 'stream']
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
provider: 'openai',
|
|
52
|
+
modality: 'image',
|
|
53
|
+
model: 'gpt-image-1',
|
|
54
|
+
supportsText: true,
|
|
55
|
+
supportsImageInput: true,
|
|
56
|
+
options: ['background', 'moderation', 'n', 'output_format', 'output_compression', 'quality', 'size', 'stream', 'input_fidelity']
|
|
57
|
+
},
|
|
58
|
+
{
|
|
59
|
+
provider: 'openai',
|
|
60
|
+
modality: 'image',
|
|
61
|
+
model: 'gpt-image-1-mini',
|
|
62
|
+
supportsText: true,
|
|
63
|
+
supportsImageInput: true,
|
|
64
|
+
options: ['background', 'moderation', 'n', 'output_format', 'output_compression', 'quality', 'size', 'stream']
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
provider: 'grok',
|
|
68
|
+
modality: 'image',
|
|
69
|
+
model: 'grok-imagine-image',
|
|
70
|
+
supportsText: true,
|
|
71
|
+
supportsImageInput: true,
|
|
72
|
+
options: ['n', 'output_format', 'quality', 'size'],
|
|
73
|
+
notes: 'xAI는 OpenAI 호환 SDK 경로를 사용'
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
provider: 'grok',
|
|
77
|
+
modality: 'image',
|
|
78
|
+
model: 'grok-imagine-image-pro',
|
|
79
|
+
supportsText: true,
|
|
80
|
+
supportsImageInput: true,
|
|
81
|
+
options: ['n', 'output_format', 'quality', 'size'],
|
|
82
|
+
notes: 'xAI는 OpenAI 호환 SDK 경로를 사용'
|
|
83
|
+
},
|
|
84
|
+
{
|
|
85
|
+
provider: 'gemini',
|
|
86
|
+
modality: 'video',
|
|
87
|
+
model: 'veo-3.1-generate-preview',
|
|
88
|
+
supportsText: true,
|
|
89
|
+
supportsImageInput: true,
|
|
90
|
+
options: ['numberOfVideos', 'fps', 'durationSeconds', 'seed', 'aspectRatio', 'resolution', 'negativePrompt', 'enhancePrompt', 'generateAudio']
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
provider: 'gemini',
|
|
94
|
+
modality: 'video',
|
|
95
|
+
model: 'veo-3.1-fast-generate-preview',
|
|
96
|
+
supportsText: true,
|
|
97
|
+
supportsImageInput: true,
|
|
98
|
+
options: ['numberOfVideos', 'fps', 'durationSeconds', 'seed', 'aspectRatio', 'resolution', 'negativePrompt', 'enhancePrompt', 'generateAudio']
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
provider: 'openai',
|
|
102
|
+
modality: 'video',
|
|
103
|
+
model: 'sora-2',
|
|
104
|
+
supportsText: true,
|
|
105
|
+
supportsImageInput: true,
|
|
106
|
+
options: ['seconds', 'size']
|
|
107
|
+
},
|
|
108
|
+
{
|
|
109
|
+
provider: 'openai',
|
|
110
|
+
modality: 'video',
|
|
111
|
+
model: 'sora-2-pro',
|
|
112
|
+
supportsText: true,
|
|
113
|
+
supportsImageInput: true,
|
|
114
|
+
options: ['seconds', 'size']
|
|
115
|
+
},
|
|
116
|
+
{
|
|
117
|
+
provider: 'grok',
|
|
118
|
+
modality: 'video',
|
|
119
|
+
model: 'grok-imagine-video',
|
|
120
|
+
supportsText: true,
|
|
121
|
+
supportsImageInput: true,
|
|
122
|
+
options: ['seconds', 'size'],
|
|
123
|
+
notes: 'xAI는 OpenAI 호환 SDK 경로를 사용'
|
|
124
|
+
}
|
|
125
|
+
];
|
|
126
|
+
export function getModelItem(provider, modality, model) {
|
|
127
|
+
return MODEL_CATALOG.find((item) => item.provider === provider && item.modality === modality && item.model === model);
|
|
128
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { generateImageWithGemini, generateVideoWithGemini } from '../providers/gemini.js';
|
|
2
|
+
import { generateImageWithOpenAILike, generateVideoWithOpenAILike } from '../providers/openaiLike.js';
|
|
3
|
+
export async function generateImage(req, apiKey) {
|
|
4
|
+
if (req.provider === 'gemini') {
|
|
5
|
+
return generateImageWithGemini(req, apiKey);
|
|
6
|
+
}
|
|
7
|
+
if (req.provider === 'openai') {
|
|
8
|
+
return generateImageWithOpenAILike(req, { provider: 'openai', apiKey });
|
|
9
|
+
}
|
|
10
|
+
return generateImageWithOpenAILike(req, {
|
|
11
|
+
provider: 'grok',
|
|
12
|
+
apiKey,
|
|
13
|
+
baseURL: 'https://api.x.ai/v1'
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
export async function generateVideo(req, apiKey) {
|
|
17
|
+
if (req.provider === 'gemini') {
|
|
18
|
+
return generateVideoWithGemini(req, apiKey);
|
|
19
|
+
}
|
|
20
|
+
if (req.provider === 'openai') {
|
|
21
|
+
return generateVideoWithOpenAILike(req, { provider: 'openai', apiKey });
|
|
22
|
+
}
|
|
23
|
+
return generateVideoWithOpenAILike(req, {
|
|
24
|
+
provider: 'grok',
|
|
25
|
+
apiKey,
|
|
26
|
+
baseURL: 'https://api.x.ai/v1'
|
|
27
|
+
});
|
|
28
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { GoogleGenAI, createPartFromUri } from '@google/genai';
|
|
4
|
+
import { detectMimeTypeFromPath, fileExists, writeBase64File } from '../utils/io.js';
|
|
5
|
+
import { imageParts, joinTextParts } from '../utils/parts.js';
|
|
6
|
+
export async function generateImageWithGemini(req, apiKey) {
|
|
7
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
8
|
+
const text = joinTextParts(req.parts);
|
|
9
|
+
const images = imageParts(req.parts);
|
|
10
|
+
if (req.model.startsWith('imagen-')) {
|
|
11
|
+
if (!text) {
|
|
12
|
+
throw new Error('Imagen 계열은 text 파트가 필요합니다.');
|
|
13
|
+
}
|
|
14
|
+
if (images.length > 0) {
|
|
15
|
+
throw new Error(`${req.model}은 이 CLI에서 text-to-image만 지원합니다. (image 입력 불가)`);
|
|
16
|
+
}
|
|
17
|
+
const response = await ai.models.generateImages({
|
|
18
|
+
model: req.model,
|
|
19
|
+
prompt: text,
|
|
20
|
+
config: req.options
|
|
21
|
+
});
|
|
22
|
+
const outputs = [];
|
|
23
|
+
for (const image of response.generatedImages ?? []) {
|
|
24
|
+
const b64 = image.image?.imageBytes;
|
|
25
|
+
if (!b64)
|
|
26
|
+
continue;
|
|
27
|
+
const mime = req.options.outputMimeType ?? 'image/png';
|
|
28
|
+
const ext = mime === 'image/jpeg' ? 'jpg' : mime === 'image/webp' ? 'webp' : 'png';
|
|
29
|
+
outputs.push(await writeBase64File(b64, req.outDir, 'gemini-image', ext));
|
|
30
|
+
}
|
|
31
|
+
return { provider: 'gemini', model: req.model, modality: 'image', outputs };
|
|
32
|
+
}
|
|
33
|
+
const contentParts = [];
|
|
34
|
+
for (const part of req.parts) {
|
|
35
|
+
if (part.type === 'text') {
|
|
36
|
+
contentParts.push({ text: part.value });
|
|
37
|
+
continue;
|
|
38
|
+
}
|
|
39
|
+
if (!(await fileExists(part.value))) {
|
|
40
|
+
throw new Error(`이미지 파일을 찾을 수 없습니다: ${part.value}`);
|
|
41
|
+
}
|
|
42
|
+
const uploaded = await ai.files.upload({ file: part.value });
|
|
43
|
+
if (!uploaded.uri || !uploaded.mimeType) {
|
|
44
|
+
throw new Error(`Gemini 파일 업로드 결과에 uri/mimeType이 없습니다: ${part.value}`);
|
|
45
|
+
}
|
|
46
|
+
contentParts.push(createPartFromUri(uploaded.uri, uploaded.mimeType));
|
|
47
|
+
}
|
|
48
|
+
const response = await ai.models.generateContent({
|
|
49
|
+
model: req.model,
|
|
50
|
+
contents: [{ role: 'user', parts: contentParts }],
|
|
51
|
+
config: {
|
|
52
|
+
responseModalities: ['IMAGE', 'TEXT'],
|
|
53
|
+
...req.options
|
|
54
|
+
}
|
|
55
|
+
});
|
|
56
|
+
const outputs = [];
|
|
57
|
+
const candidates = response.candidates ?? [];
|
|
58
|
+
for (const candidate of candidates) {
|
|
59
|
+
const parts = candidate?.content?.parts ?? [];
|
|
60
|
+
for (const part of parts) {
|
|
61
|
+
const b64 = part?.inlineData?.data;
|
|
62
|
+
const mime = part?.inlineData?.mimeType;
|
|
63
|
+
if (!b64)
|
|
64
|
+
continue;
|
|
65
|
+
const ext = mime === 'image/jpeg' ? 'jpg' : mime === 'image/webp' ? 'webp' : 'png';
|
|
66
|
+
outputs.push(await writeBase64File(b64, req.outDir, 'gemini-image', ext));
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
return { provider: 'gemini', model: req.model, modality: 'image', outputs };
|
|
70
|
+
}
|
|
71
|
+
export async function generateVideoWithGemini(req, apiKey) {
|
|
72
|
+
const ai = new GoogleGenAI({ apiKey });
|
|
73
|
+
const text = joinTextParts(req.parts);
|
|
74
|
+
const images = imageParts(req.parts);
|
|
75
|
+
if (!text && images.length === 0) {
|
|
76
|
+
throw new Error('Gemini 비디오는 text 또는 image 입력이 필요합니다.');
|
|
77
|
+
}
|
|
78
|
+
if (images.length > 1) {
|
|
79
|
+
throw new Error('Gemini 비디오는 현재 이미지 입력 1개만 지원합니다.');
|
|
80
|
+
}
|
|
81
|
+
const source = {};
|
|
82
|
+
if (text)
|
|
83
|
+
source.prompt = text;
|
|
84
|
+
if (images[0]) {
|
|
85
|
+
if (!(await fileExists(images[0]))) {
|
|
86
|
+
throw new Error(`이미지 파일을 찾을 수 없습니다: ${images[0]}`);
|
|
87
|
+
}
|
|
88
|
+
const bytes = await fs.readFile(images[0]);
|
|
89
|
+
source.image = {
|
|
90
|
+
imageBytes: bytes.toString('base64'),
|
|
91
|
+
mimeType: await detectMimeTypeFromPath(images[0])
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
let operation = await ai.models.generateVideos({
|
|
95
|
+
model: req.model,
|
|
96
|
+
source,
|
|
97
|
+
config: req.options
|
|
98
|
+
});
|
|
99
|
+
const started = Date.now();
|
|
100
|
+
while (!operation.done) {
|
|
101
|
+
if (Date.now() - started > req.timeoutMs) {
|
|
102
|
+
throw new Error('Gemini 비디오 생성 대기 시간이 초과되었습니다. --timeout-ms를 늘려주세요.');
|
|
103
|
+
}
|
|
104
|
+
await new Promise((resolve) => setTimeout(resolve, req.pollIntervalMs));
|
|
105
|
+
operation = await ai.operations.getVideosOperation({ operation });
|
|
106
|
+
}
|
|
107
|
+
const generated = operation.response?.generatedVideos ?? [];
|
|
108
|
+
if (generated.length === 0) {
|
|
109
|
+
throw new Error('Gemini 비디오 생성 결과가 비어 있습니다.');
|
|
110
|
+
}
|
|
111
|
+
const outputs = [];
|
|
112
|
+
for (let i = 0; i < generated.length; i += 1) {
|
|
113
|
+
const downloadPath = path.resolve(req.outDir, `gemini-video-${Date.now()}-${i + 1}.mp4`);
|
|
114
|
+
await ai.files.download({ file: generated[i], downloadPath });
|
|
115
|
+
outputs.push(downloadPath);
|
|
116
|
+
}
|
|
117
|
+
return {
|
|
118
|
+
provider: 'gemini',
|
|
119
|
+
model: req.model,
|
|
120
|
+
modality: 'video',
|
|
121
|
+
outputs,
|
|
122
|
+
meta: { operationName: operation.name }
|
|
123
|
+
};
|
|
124
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import OpenAI from 'openai';
|
|
3
|
+
import { imageParts, joinTextParts } from '../utils/parts.js';
|
|
4
|
+
import { fileExists, writeBase64File, writeBufferFile } from '../utils/io.js';
|
|
5
|
+
function makeClient(config) {
|
|
6
|
+
return new OpenAI({
|
|
7
|
+
apiKey: config.apiKey,
|
|
8
|
+
baseURL: config.baseURL
|
|
9
|
+
});
|
|
10
|
+
}
|
|
11
|
+
export async function generateImageWithOpenAILike(req, config) {
|
|
12
|
+
const client = makeClient(config);
|
|
13
|
+
const prompt = joinTextParts(req.parts);
|
|
14
|
+
const images = imageParts(req.parts);
|
|
15
|
+
if (!prompt) {
|
|
16
|
+
throw new Error('이미지 생성에는 최소 1개의 text 파트가 필요합니다.');
|
|
17
|
+
}
|
|
18
|
+
if (images.length === 0) {
|
|
19
|
+
const result = await client.images.generate({
|
|
20
|
+
model: req.model,
|
|
21
|
+
prompt,
|
|
22
|
+
...req.options
|
|
23
|
+
});
|
|
24
|
+
const outputs = [];
|
|
25
|
+
for (const image of result.data ?? []) {
|
|
26
|
+
if (!image.b64_json)
|
|
27
|
+
continue;
|
|
28
|
+
const ext = (req.options.output_format ?? 'png').replace('jpeg', 'jpg');
|
|
29
|
+
outputs.push(await writeBase64File(image.b64_json, req.outDir, `${config.provider}-image`, ext));
|
|
30
|
+
}
|
|
31
|
+
return { provider: config.provider, model: req.model, modality: 'image', outputs };
|
|
32
|
+
}
|
|
33
|
+
const uploadables = [];
|
|
34
|
+
for (const imagePath of images) {
|
|
35
|
+
if (!(await fileExists(imagePath))) {
|
|
36
|
+
throw new Error(`이미지 파일을 찾을 수 없습니다: ${imagePath}`);
|
|
37
|
+
}
|
|
38
|
+
uploadables.push(fs.createReadStream(imagePath));
|
|
39
|
+
}
|
|
40
|
+
const edited = await client.images.edit({
|
|
41
|
+
model: req.model,
|
|
42
|
+
prompt,
|
|
43
|
+
image: uploadables,
|
|
44
|
+
...req.options
|
|
45
|
+
});
|
|
46
|
+
const outputs = [];
|
|
47
|
+
for (const image of edited.data ?? []) {
|
|
48
|
+
if (!image.b64_json)
|
|
49
|
+
continue;
|
|
50
|
+
const ext = (req.options.output_format ?? 'png').replace('jpeg', 'jpg');
|
|
51
|
+
outputs.push(await writeBase64File(image.b64_json, req.outDir, `${config.provider}-image`, ext));
|
|
52
|
+
}
|
|
53
|
+
return { provider: config.provider, model: req.model, modality: 'image', outputs };
|
|
54
|
+
}
|
|
55
|
+
export async function generateVideoWithOpenAILike(req, config) {
|
|
56
|
+
const client = makeClient(config);
|
|
57
|
+
const prompt = joinTextParts(req.parts);
|
|
58
|
+
const images = imageParts(req.parts);
|
|
59
|
+
if (!prompt) {
|
|
60
|
+
throw new Error('동영상 생성에는 최소 1개의 text 파트가 필요합니다.');
|
|
61
|
+
}
|
|
62
|
+
if (images.length > 1) {
|
|
63
|
+
throw new Error('OpenAI/xAI 비디오는 현재 이미지 참조 1개만 지원합니다.');
|
|
64
|
+
}
|
|
65
|
+
const createReq = {
|
|
66
|
+
model: req.model,
|
|
67
|
+
prompt,
|
|
68
|
+
...req.options
|
|
69
|
+
};
|
|
70
|
+
if (images[0]) {
|
|
71
|
+
if (!(await fileExists(images[0]))) {
|
|
72
|
+
throw new Error(`이미지 파일을 찾을 수 없습니다: ${images[0]}`);
|
|
73
|
+
}
|
|
74
|
+
createReq.input_reference = fs.createReadStream(images[0]);
|
|
75
|
+
}
|
|
76
|
+
let job = await client.videos.create(createReq);
|
|
77
|
+
const started = Date.now();
|
|
78
|
+
while (job.status !== 'completed') {
|
|
79
|
+
if (job.status === 'failed') {
|
|
80
|
+
throw new Error(`비디오 생성 실패: ${job.error?.message ?? '알 수 없는 오류'}`);
|
|
81
|
+
}
|
|
82
|
+
if (Date.now() - started > req.timeoutMs) {
|
|
83
|
+
throw new Error('비디오 생성 대기 시간이 초과되었습니다. --timeout-ms를 늘려주세요.');
|
|
84
|
+
}
|
|
85
|
+
await new Promise((resolve) => setTimeout(resolve, req.pollIntervalMs));
|
|
86
|
+
job = await client.videos.retrieve(job.id);
|
|
87
|
+
}
|
|
88
|
+
const response = await client.videos.downloadContent(job.id, { variant: 'video' });
|
|
89
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
90
|
+
const outputPath = await writeBufferFile(Buffer.from(arrayBuffer), req.outDir, `${config.provider}-video`, 'mp4');
|
|
91
|
+
return {
|
|
92
|
+
provider: config.provider,
|
|
93
|
+
model: req.model,
|
|
94
|
+
modality: 'video',
|
|
95
|
+
outputs: [outputPath],
|
|
96
|
+
meta: { jobId: job.id }
|
|
97
|
+
};
|
|
98
|
+
}
|