s3_to_drive 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +43 -0
- data/Rakefile +8 -0
- data/lib/concerns/transfer_s3_to_drive/.env +22 -0
- data/lib/concerns/transfer_s3_to_drive/Dockerfile +14 -0
- data/lib/concerns/transfer_s3_to_drive/app/__init__.py +0 -0
- data/lib/concerns/transfer_s3_to_drive/app/__pycache__/main.cpython-312.pyc +0 -0
- data/lib/concerns/transfer_s3_to_drive/app/config.py +21 -0
- data/lib/concerns/transfer_s3_to_drive/app/db.py +18 -0
- data/lib/concerns/transfer_s3_to_drive/app/drive_upload.py +311 -0
- data/lib/concerns/transfer_s3_to_drive/app/drive_uploader.py +213 -0
- data/lib/concerns/transfer_s3_to_drive/app/kidsly-admin.code-workspace +8 -0
- data/lib/concerns/transfer_s3_to_drive/app/main.py +118 -0
- data/lib/concerns/transfer_s3_to_drive/app/models.py +38 -0
- data/lib/concerns/transfer_s3_to_drive/app/state_manager.py +160 -0
- data/lib/concerns/transfer_s3_to_drive/docker-compose.yml +20 -0
- data/lib/concerns/transfer_s3_to_drive/requirements.txt +20 -0
- data/lib/concerns/transfer_s3_to_drive/service_account.json +13 -0
- data/lib/s3_to_drive/version.rb +5 -0
- data/lib/s3_to_drive.rb +22 -0
- data/sig/s3_to_drive.rbs +4 -0
- metadata +68 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 8d05b41d7ec65ff0f7d8d3eaba76a06540fead2cc84c16f396d6d18554a6ba2f
|
|
4
|
+
data.tar.gz: 65fa074539590f5b70482da9b3680336349a0918b57df2e4e698833926c8c101
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: a7f87c5f4f829749e4e68c6f01ee3aafe65f3d0cb53ed09c0bcf5940e8c8eaecd571a719552a2cc7836fb88016e481173dad841649deb01ee30e97d0e6cdffb8
|
|
7
|
+
data.tar.gz: 36287a98a7811066c34252f040f44a186e7e6311a7deadfa10c8bf9751d8b1a1d899f3008858912f31223034ae9637b35317eab24eb86d617e5662d8d4b2d663
|
data/.rubocop.yml
ADDED
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 mirabo-hoang-lx
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# S3ToDrive
|
|
2
|
+
1. Test local
|
|
3
|
+
- bin/console
|
|
4
|
+
- S3ToDrive.transfer('','','')
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
TODO: Delete this and the text below, and describe your gem
|
|
8
|
+
|
|
9
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/s3_to_drive`. To experiment with that code, run `bin/console` for an interactive prompt.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
|
|
14
|
+
|
|
15
|
+
Install the gem and add to the application's Gemfile by executing:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
bundle add UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
If bundler is not being used to manage dependencies, install the gem by executing:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
gem install UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
TODO: Write usage instructions here
|
|
30
|
+
|
|
31
|
+
## Development
|
|
32
|
+
|
|
33
|
+
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
34
|
+
|
|
35
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
|
36
|
+
|
|
37
|
+
## Contributing
|
|
38
|
+
|
|
39
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/s3_to_drive.
|
|
40
|
+
|
|
41
|
+
## License
|
|
42
|
+
|
|
43
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
MYSQL_HOST=host.docker.internal
|
|
2
|
+
MYSQL_PORT=3307
|
|
3
|
+
DB_NAME=kidsly_dev
|
|
4
|
+
DB_PASSWORD=eu7dGHDhqu
|
|
5
|
+
# MYSQL_DATABASE=kidsly_production
|
|
6
|
+
# MYSQL_PASSWORD=YQYIlmdYy4
|
|
7
|
+
MYSQL_USER=kidsuser
|
|
8
|
+
DATABASE_URL=mysql+pymysql://kidsuser:eu7dGHDhqu@host.docker.internal:3307/kidsly_dev
|
|
9
|
+
# DATABASE_URL=mysql+pymysql://kidsuser:YQYIlmdYy4@kidsly-production-masking-20250624.cnj5ipatayx0.ap-northeast-1.rds.amazonaws.com:3306/kidsly_production
|
|
10
|
+
|
|
11
|
+
# GOOGLE_CLIENT_ID=21003661994-ped8kp8p8niek3g4snnrhfgmhdlkajrm.apps.googleusercontent.com
|
|
12
|
+
# GOOGLE_CLIENT_SECRET=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
|
|
13
|
+
# GOOGLE_REFRESH_TOKEN=1//04K8m_kfKLJTmCgYIARAAGAQSNwF-L9Ir9eeCdYsyOGkJFbt7aKSfAtPBGvJThTzGgagFmxPa6Jqd6seqO1URpT_qnd6XojZifxc
|
|
14
|
+
GOOGLE_DRIVE_FOLDER_ID=1Jl8KhcpQ_KgqmQW7vkRwhstKSDGaWSZz
|
|
15
|
+
AWS_TRANSFER_S3_PRIVATE_CONTENTS_BUCKET=kidsly-dev-private-contents
|
|
16
|
+
|
|
17
|
+
AWS_ACCESS_KEY_ID=AKIA3VOL6WH7UI2CD4N7
|
|
18
|
+
AWS_SECRET_ACCESS_KEY=iK1SvyOVJdtv3PJLxbTEyLjhqbOpGEwE3xVrCh/d
|
|
19
|
+
AWS_S3_IMAGE_BUCKET=kidsly-dev-app
|
|
20
|
+
AWS_REGION=ap-northeast-1
|
|
21
|
+
|
|
22
|
+
SERVICE_ACCOUNT_FILE=/app/service_account.json
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
FROM python:3.11-slim-bookworm
|
|
2
|
+
|
|
3
|
+
WORKDIR /app
|
|
4
|
+
|
|
5
|
+
ENV PIP_PROGRESS_BAR=off
|
|
6
|
+
|
|
7
|
+
COPY requirements.txt .
|
|
8
|
+
RUN pip install --no-cache-dir -r requirements.txt
|
|
9
|
+
|
|
10
|
+
COPY app/ app/
|
|
11
|
+
COPY service_account.json .
|
|
12
|
+
|
|
13
|
+
CMD ["python", "-m", "app.main", "771", ""]
|
|
14
|
+
|
|
File without changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
# ---- Main DB (Rails default / production) ----
|
|
4
|
+
DB_HOST = os.getenv("MYSQL_HOST")
|
|
5
|
+
DB_PORT = os.getenv("MYSQL_PORT")
|
|
6
|
+
DB_NAME = os.getenv("MYSQL_DATABASE")
|
|
7
|
+
DB_USER = os.getenv("MYSQL_USER")
|
|
8
|
+
DB_PASSWORD = os.getenv("MYSQL_PASSWORD")
|
|
9
|
+
|
|
10
|
+
DATABASE_URL = os.getenv("DATABASE_URL")
|
|
11
|
+
|
|
12
|
+
# AWS config
|
|
13
|
+
AWS_REGION = os.getenv("AWS_REGION")
|
|
14
|
+
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
|
|
15
|
+
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
|
|
16
|
+
S3_BUCKET = os.getenv("AWS_TRANSFER_S3_PRIVATE_CONTENTS_BUCKET")
|
|
17
|
+
SQS_QUEUE_URL = os.getenv("AWS_SQS_QUEUE_URL")
|
|
18
|
+
|
|
19
|
+
#
|
|
20
|
+
SERVICE_ACCOUNT_FILE = os.getenv("SERVICE_ACCOUNT_FILE")
|
|
21
|
+
DRIVE_FOLDER_ID = os.getenv("GOOGLE_DRIVE_FOLDER_ID")
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from sqlalchemy import create_engine
|
|
2
|
+
from sqlalchemy.orm import sessionmaker
|
|
3
|
+
from app.config import DATABASE_URL
|
|
4
|
+
|
|
5
|
+
# ---- Main DB ----
|
|
6
|
+
engine = create_engine(
|
|
7
|
+
DATABASE_URL,
|
|
8
|
+
pool_size=5, # giống RAILS_MAX_THREADS
|
|
9
|
+
max_overflow=10,
|
|
10
|
+
pool_pre_ping=True,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
SessionLocal = sessionmaker(
|
|
14
|
+
bind=engine,
|
|
15
|
+
autocommit=False,
|
|
16
|
+
autoflush=False,
|
|
17
|
+
)
|
|
18
|
+
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import requests
|
|
3
|
+
from google.auth.transport.requests import Request
|
|
4
|
+
from google.oauth2 import service_account
|
|
5
|
+
from typing import Tuple
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class DriveUpload:
|
|
9
|
+
"""Google Drive へのアップロード処理を担当するクラス(resumable upload 対応)"""
|
|
10
|
+
|
|
11
|
+
# Configuration
|
|
12
|
+
CHUNK_SIZE = 256 * 1024 * 1024 # 256MB chunks
|
|
13
|
+
MAX_RETRIES = 5 # Max retries per chunk
|
|
14
|
+
SCOPES = ['https://www.googleapis.com/auth/drive.file']
|
|
15
|
+
|
|
16
|
+
def __init__(self, service_account_file: str):
|
|
17
|
+
"""
|
|
18
|
+
Drive API クライアントを初期化する。
|
|
19
|
+
|
|
20
|
+
[Input]
|
|
21
|
+
- service_account_file (str) : Google サービスアカウントの JSON ファイルパス
|
|
22
|
+
|
|
23
|
+
[Output / 副作用]
|
|
24
|
+
- self.credentials を設定する(アクセストークンの自動更新に対応)
|
|
25
|
+
"""
|
|
26
|
+
self.credentials = service_account.Credentials.from_service_account_file(
|
|
27
|
+
service_account_file,
|
|
28
|
+
scopes=self.SCOPES
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def find_folder(self, folder_name: str, parent_folder_id: str) -> str:
|
|
33
|
+
"""
|
|
34
|
+
親フォルダ内に同名フォルダが存在するか検索する。
|
|
35
|
+
|
|
36
|
+
[Input]
|
|
37
|
+
- folder_name (str) : 検索するフォルダ名
|
|
38
|
+
- parent_folder_id (str) : 親フォルダの Google Drive ID
|
|
39
|
+
|
|
40
|
+
[Output]
|
|
41
|
+
- str : 見つかった場合はフォルダ ID を返す
|
|
42
|
+
- None : 見つからない場合は None を返す
|
|
43
|
+
|
|
44
|
+
[処理フロー]
|
|
45
|
+
1. サービスアカウントのアクセストークンを更新する
|
|
46
|
+
2. Drive API v3 Files.list を使ってフォルダ名・親 ID・削除フラグで絞り込む
|
|
47
|
+
3. 最初にヒットしたフォルダの ID を返す(複数存在する場合は先頭のみ使用)
|
|
48
|
+
"""
|
|
49
|
+
# Get access token
|
|
50
|
+
self.credentials.refresh(Request())
|
|
51
|
+
access_token = self.credentials.token
|
|
52
|
+
|
|
53
|
+
# Search for folder
|
|
54
|
+
headers = {
|
|
55
|
+
'Authorization': f'Bearer {access_token}',
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
# Query to find folder with exact name in parent folder
|
|
59
|
+
query = f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder' and '{parent_folder_id}' in parents and trashed=false"
|
|
60
|
+
|
|
61
|
+
params = {
|
|
62
|
+
'q': query,
|
|
63
|
+
'fields': 'files(id, name)',
|
|
64
|
+
'supportsAllDrives': 'true',
|
|
65
|
+
'includeItemsFromAllDrives': 'true'
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
response = requests.get(
|
|
69
|
+
'https://www.googleapis.com/drive/v3/files',
|
|
70
|
+
headers=headers,
|
|
71
|
+
params=params
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if response.status_code != 200:
|
|
75
|
+
print(f"[WARNING] Failed to search for folder: {response.status_code}")
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
data = response.json()
|
|
79
|
+
files = data.get('files', [])
|
|
80
|
+
|
|
81
|
+
if files:
|
|
82
|
+
folder_id = files[0]['id']
|
|
83
|
+
print(f"[DRIVE] Found existing folder: {folder_name} (ID: {folder_id})")
|
|
84
|
+
return folder_id
|
|
85
|
+
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
def create_folder(self, folder_name: str, parent_folder_id: str) -> str:
|
|
89
|
+
"""
|
|
90
|
+
Google Drive 上に新しいフォルダを作成する。
|
|
91
|
+
|
|
92
|
+
[Input]
|
|
93
|
+
- folder_name (str) : 作成するフォルダ名
|
|
94
|
+
- parent_folder_id (str) : 親フォルダの Google Drive ID
|
|
95
|
+
|
|
96
|
+
[Output]
|
|
97
|
+
- str : 作成されたフォルダの Google Drive ID
|
|
98
|
+
|
|
99
|
+
[処理フロー]
|
|
100
|
+
1. サービスアカウントのアクセストークンを更新する
|
|
101
|
+
2. Drive API v3 Files.create を呼び出し、フォルダのメタデータを設定する
|
|
102
|
+
- mimeType: 'application/vnd.google-apps.folder'
|
|
103
|
+
- parents: [parent_folder_id]
|
|
104
|
+
3. レスポンスからフォルダ ID を取得して返す
|
|
105
|
+
4. HTTP 200/201 以外の場合は例外を送出する
|
|
106
|
+
"""
|
|
107
|
+
# Get access token
|
|
108
|
+
self.credentials.refresh(Request())
|
|
109
|
+
access_token = self.credentials.token
|
|
110
|
+
|
|
111
|
+
# Metadata for folder
|
|
112
|
+
metadata = {
|
|
113
|
+
'name': folder_name,
|
|
114
|
+
'mimeType': 'application/vnd.google-apps.folder',
|
|
115
|
+
'parents': [parent_folder_id]
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
# Create folder
|
|
119
|
+
headers = {
|
|
120
|
+
'Authorization': f'Bearer {access_token}',
|
|
121
|
+
'Content-Type': 'application/json'
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
response = requests.post(
|
|
125
|
+
'https://www.googleapis.com/drive/v3/files',
|
|
126
|
+
headers=headers,
|
|
127
|
+
json=metadata,
|
|
128
|
+
params={'supportsAllDrives': 'true'}
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
if response.status_code not in [200, 201]:
|
|
132
|
+
raise Exception(f"Failed to create folder: {response.status_code} {response.text}")
|
|
133
|
+
|
|
134
|
+
folder_data = response.json()
|
|
135
|
+
folder_id = folder_data.get('id')
|
|
136
|
+
|
|
137
|
+
print(f"[DRIVE] Created folder: {folder_name} (ID: {folder_id})")
|
|
138
|
+
|
|
139
|
+
return folder_id
|
|
140
|
+
|
|
141
|
+
def create_resumable_upload(self, filename: str, folder_id: str,
|
|
142
|
+
file_size: int) -> str:
|
|
143
|
+
"""
|
|
144
|
+
Google Drive の resumable upload セッションを作成する。
|
|
145
|
+
|
|
146
|
+
[Input]
|
|
147
|
+
- filename (str) : アップロードするファイル名
|
|
148
|
+
- folder_id (str) : アップロード先フォルダの Google Drive ID
|
|
149
|
+
- file_size (int) : ファイルの総バイト数
|
|
150
|
+
|
|
151
|
+
[Output]
|
|
152
|
+
- str : resumable upload URL(以降のチャンクアップロードに使用する)
|
|
153
|
+
|
|
154
|
+
[処理フロー]
|
|
155
|
+
1. サービスアカウントのアクセストークンを更新する
|
|
156
|
+
2. Drive API の uploadType=resumable エンドポイントに POST リクエストを送る
|
|
157
|
+
- X-Upload-Content-Length: file_size を指定してファイルサイズを宣言する
|
|
158
|
+
3. レスポンスの Location ヘッダから upload_url を取得して返す
|
|
159
|
+
4. HTTP 200 以外または Location がない場合は例外を送出する
|
|
160
|
+
"""
|
|
161
|
+
# Get access token
|
|
162
|
+
self.credentials.refresh(Request())
|
|
163
|
+
access_token = self.credentials.token
|
|
164
|
+
|
|
165
|
+
# Metadata
|
|
166
|
+
metadata = {
|
|
167
|
+
'name': filename,
|
|
168
|
+
'parents': [folder_id]
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
# Initiate resumable upload
|
|
172
|
+
headers = {
|
|
173
|
+
'Authorization': f'Bearer {access_token}',
|
|
174
|
+
'Content-Type': 'application/json; charset=UTF-8',
|
|
175
|
+
'X-Upload-Content-Type': 'application/octet-stream',
|
|
176
|
+
'X-Upload-Content-Length': str(file_size)
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
response = requests.post(
|
|
180
|
+
'https://www.googleapis.com/upload/drive/v3/files?uploadType=resumable',
|
|
181
|
+
headers=headers,
|
|
182
|
+
json=metadata,
|
|
183
|
+
params={'supportsAllDrives': 'true'}
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
if response.status_code != 200:
|
|
187
|
+
raise Exception(f"Failed to create upload session: {response.status_code} {response.text}")
|
|
188
|
+
|
|
189
|
+
upload_url = response.headers.get('Location')
|
|
190
|
+
if not upload_url:
|
|
191
|
+
raise Exception("No upload URL in response")
|
|
192
|
+
|
|
193
|
+
print(f"[DRIVE] Created resumable upload session")
|
|
194
|
+
print(f"[DRIVE] Upload URL: {upload_url[:80]}...")
|
|
195
|
+
|
|
196
|
+
return upload_url
|
|
197
|
+
|
|
198
|
+
def get_upload_status(self, upload_url: str, file_size: int) -> int:
|
|
199
|
+
"""
|
|
200
|
+
現在のアップロード進捗バイト数を Google Drive に問い合わせる。
|
|
201
|
+
|
|
202
|
+
[Input]
|
|
203
|
+
- upload_url (str) : resumable upload の URL
|
|
204
|
+
- file_size (int) : ファイルの総バイト数
|
|
205
|
+
|
|
206
|
+
[Output]
|
|
207
|
+
- int : これまでにアップロード済みのバイト数
|
|
208
|
+
(情報が取得できない場合は 0 を返す)
|
|
209
|
+
|
|
210
|
+
[処理フロー]
|
|
211
|
+
1. Content-Range: bytes */file_size で PUT リクエストを送信する
|
|
212
|
+
2. HTTP 308 (Resume Incomplete) の場合、Range ヘッダから進捗バイト数を計算する
|
|
213
|
+
3. それ以外の場合は 0 を返す
|
|
214
|
+
"""
|
|
215
|
+
headers = {
|
|
216
|
+
'Content-Range': f'bytes */{file_size}',
|
|
217
|
+
'Content-Length': '0'
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
response = requests.put(upload_url, headers=headers)
|
|
221
|
+
|
|
222
|
+
if response.status_code == 308:
|
|
223
|
+
# Resume incomplete
|
|
224
|
+
range_header = response.headers.get('Range', '')
|
|
225
|
+
if range_header:
|
|
226
|
+
# Format: "bytes=0-12345"
|
|
227
|
+
uploaded = int(range_header.split('-')[1]) + 1
|
|
228
|
+
print(f"[DRIVE] Current upload status: {uploaded:,} bytes")
|
|
229
|
+
return uploaded
|
|
230
|
+
|
|
231
|
+
return 0
|
|
232
|
+
|
|
233
|
+
def upload_chunk(self, chunk_data: bytes, offset: int,
|
|
234
|
+
file_size: int, upload_url: str) -> Tuple[bool, int]:
|
|
235
|
+
"""
|
|
236
|
+
単一チャンクを Google Drive へアップロードする(429 リトライあり)。
|
|
237
|
+
|
|
238
|
+
[Input]
|
|
239
|
+
- chunk_data (bytes) : アップロードするチャンクのバイナリデータ
|
|
240
|
+
- offset (int) : このチャンクの開始バイト位置(0 始まり)
|
|
241
|
+
- file_size (int) : ファイルの総バイト数
|
|
242
|
+
- upload_url (str) : resumable upload の URL
|
|
243
|
+
|
|
244
|
+
[Output]
|
|
245
|
+
- Tuple[bool, int]
|
|
246
|
+
- bool : アップロード成功かどうか(True: 成功)
|
|
247
|
+
- int : アップロード後の累計バイト数
|
|
248
|
+
ファイル完了時は file_size、継続中は end_byte + 1
|
|
249
|
+
|
|
250
|
+
[処理フロー]
|
|
251
|
+
MAX_RETRIES 回まで以下をリトライする:
|
|
252
|
+
1. Content-Range: bytes {offset}-{end_byte}/{file_size} を指定して PUT リクエストを送信する
|
|
253
|
+
2. HTTP 200/201 → アップロード完了。(True, file_size) を返す
|
|
254
|
+
3. HTTP 308 → チャンクのみ完了(継続中)。(True, end_byte + 1) を返す
|
|
255
|
+
4. HTTP 429 → レート制限。指数バックオフ(最大 60 秒)でリトライする
|
|
256
|
+
5. その他エラー → 例外を送出する
|
|
257
|
+
MAX_RETRIES 超過時: Exception を送出する
|
|
258
|
+
"""
|
|
259
|
+
chunk_size = len(chunk_data)
|
|
260
|
+
end_byte = offset + chunk_size - 1
|
|
261
|
+
|
|
262
|
+
for attempt in range(self.MAX_RETRIES):
|
|
263
|
+
try:
|
|
264
|
+
headers = {
|
|
265
|
+
'Content-Range': f'bytes {offset}-{end_byte}/{file_size}',
|
|
266
|
+
'Content-Length': str(chunk_size)
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
response = requests.put(
|
|
270
|
+
upload_url,
|
|
271
|
+
data=chunk_data,
|
|
272
|
+
headers=headers,
|
|
273
|
+
timeout=300 # 5 minutes timeout
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
# Success
|
|
277
|
+
if response.status_code in [200, 201]:
|
|
278
|
+
print(f"[DRIVE] ✅ Upload complete!")
|
|
279
|
+
return True, file_size
|
|
280
|
+
|
|
281
|
+
elif response.status_code == 308:
|
|
282
|
+
# Resume incomplete - chunk uploaded successfully
|
|
283
|
+
print(f"[DRIVE] ✅ Chunk uploaded: {offset:,} - {end_byte:,}")
|
|
284
|
+
return True, end_byte + 1
|
|
285
|
+
|
|
286
|
+
elif response.status_code == 429:
|
|
287
|
+
# Rate limit - retry with exponential backoff
|
|
288
|
+
wait_time = min(2 ** attempt, 60) # Max 60s
|
|
289
|
+
print(f"[429] Retry {attempt + 1}/{self.MAX_RETRIES}, "
|
|
290
|
+
f"wait {wait_time}s, offset {offset:,}")
|
|
291
|
+
time.sleep(wait_time)
|
|
292
|
+
continue
|
|
293
|
+
|
|
294
|
+
else:
|
|
295
|
+
# Other error
|
|
296
|
+
print(f"[ERROR] Upload failed: HTTP {response.status_code}")
|
|
297
|
+
print(f"[ERROR] Response: {response.text[:200]}")
|
|
298
|
+
raise Exception(f"Upload failed: {response.status_code}")
|
|
299
|
+
|
|
300
|
+
except Exception as e:
|
|
301
|
+
print(f"[ERROR] Exception during upload: {e}")
|
|
302
|
+
if attempt < self.MAX_RETRIES - 1:
|
|
303
|
+
wait_time = min(2 ** attempt, 10)
|
|
304
|
+
print(f"[RETRY] Retry {attempt + 1}/{self.MAX_RETRIES}, wait {wait_time}s")
|
|
305
|
+
time.sleep(wait_time)
|
|
306
|
+
continue
|
|
307
|
+
else:
|
|
308
|
+
raise
|
|
309
|
+
|
|
310
|
+
# Max retries exceeded
|
|
311
|
+
raise Exception(f"Max retries ({self.MAX_RETRIES}) exceeded for chunk at offset {offset}")
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import os
|
|
3
|
+
import sys
|
|
4
|
+
import boto3
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from app.state_manager import StateManager
|
|
7
|
+
from app.config import SERVICE_ACCOUNT_FILE
|
|
8
|
+
from app.drive_upload import DriveUpload
|
|
9
|
+
|
|
10
|
+
def stream_s3_to_drive(s3_bucket: str, s3_key: str, drive_folder_id: str, folder_name: str):
|
|
11
|
+
"""
|
|
12
|
+
S3 上のファイルを Google Drive へストリーミング転送するメイン関数。
|
|
13
|
+
|
|
14
|
+
[Input]
|
|
15
|
+
- s3_bucket (str) : 転送元 S3 バケット名
|
|
16
|
+
- s3_key (str) : 転送元 S3 オブジェクトキー(例: "123/batch_downloads/file.zip")
|
|
17
|
+
- drive_folder_id (str) : アップロード先 Google Drive の親フォルダ ID
|
|
18
|
+
- folder_name (str) : Drive 上に作成するサブフォルダ名("{nursery_id}_{nursery_name}")
|
|
19
|
+
|
|
20
|
+
[Output / 副作用]
|
|
21
|
+
- Google Drive の drive_folder_id 配下に folder_name のフォルダを作成(既存なら再利用)し、
|
|
22
|
+
s3_key のファイルをアップロードする
|
|
23
|
+
- 転送完了後に state.json を削除する
|
|
24
|
+
|
|
25
|
+
[処理フロー]
|
|
26
|
+
1. StateManager / DriveUpload / S3 クライアントを初期化する
|
|
27
|
+
2. S3 の head_object でファイルサイズとファイル名を取得する
|
|
28
|
+
3. state.json を読み込む(存在すればレジューム、存在しなければ新規)
|
|
29
|
+
[レジューム] s3_key・file_size が一致している場合のみ再開。不一致なら新規扱い
|
|
30
|
+
[新規]
|
|
31
|
+
a. Drive 上の親フォルダを検索し、なければ新規作成する
|
|
32
|
+
b. Google Drive に resumable upload セッションを作成し upload_url を取得する
|
|
33
|
+
c. state.json に初期状態を保存する
|
|
34
|
+
4. チャンクループ(offset < file_size の間繰り返す):
|
|
35
|
+
a. S3 から byte_range 指定でチャンクデータを読み込む
|
|
36
|
+
b. DriveUpload.upload_chunk で Drive へアップロードする
|
|
37
|
+
c. StateManager.update_progress で進捗を state.json に保存する
|
|
38
|
+
d. 転送速度・ETA を計算してログ出力する
|
|
39
|
+
5. 全チャンク完了後、state.json を削除して終了する
|
|
40
|
+
"""
|
|
41
|
+
# Initialize components
|
|
42
|
+
state_mgr = StateManager('state.json')
|
|
43
|
+
uploader = DriveUpload(service_account_file=SERVICE_ACCOUNT_FILE)
|
|
44
|
+
s3_client = boto3.client('s3')
|
|
45
|
+
|
|
46
|
+
# Get file info from S3
|
|
47
|
+
print(f"\n{'='*60}")
|
|
48
|
+
print(f"S3 to Google Drive Transfer")
|
|
49
|
+
print(f"{'='*60}")
|
|
50
|
+
print(f"S3 Bucket: {s3_bucket}")
|
|
51
|
+
print(f"S3 Key: {s3_key}")
|
|
52
|
+
print(f"Drive Folder: {drive_folder_id}")
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
head_response = s3_client.head_object(Bucket=s3_bucket, Key=s3_key)
|
|
56
|
+
file_size = head_response['ContentLength']
|
|
57
|
+
filename = os.path.basename(s3_key)
|
|
58
|
+
|
|
59
|
+
print(f"File: {filename}")
|
|
60
|
+
print(f"Size: {file_size:,} bytes ({file_size / (1024**3):.2f} GB)")
|
|
61
|
+
print(f"{'='*60}\n")
|
|
62
|
+
|
|
63
|
+
except Exception as e:
|
|
64
|
+
print(f"[ERROR] Failed to get S3 file info: {e}")
|
|
65
|
+
sys.exit(1)
|
|
66
|
+
|
|
67
|
+
# Load or create state
|
|
68
|
+
state = state_mgr.load()
|
|
69
|
+
|
|
70
|
+
if state:
|
|
71
|
+
# Resume existing upload
|
|
72
|
+
print(f"[RESUME] Found existing upload state")
|
|
73
|
+
print(f"[RESUME] Progress: {state['uploaded_bytes']:,} / {file_size:,} bytes")
|
|
74
|
+
print(f"[RESUME] ({state['uploaded_bytes'] * 100 / file_size:.1f}%)\n")
|
|
75
|
+
|
|
76
|
+
# Verify file matches
|
|
77
|
+
if state['s3_key'] != s3_key or state['file_size'] != file_size:
|
|
78
|
+
print(f"[WARNING] State file is for different upload!")
|
|
79
|
+
print(f"[WARNING] Removing old state and starting fresh...")
|
|
80
|
+
state_mgr.delete()
|
|
81
|
+
state = None
|
|
82
|
+
|
|
83
|
+
if not state:
|
|
84
|
+
# Create new upload
|
|
85
|
+
print(f"[NEW] Starting new upload...\n")
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
# Check if folder already exists
|
|
89
|
+
print(f"[DRIVE] Checking for existing folder: {folder_name}")
|
|
90
|
+
new_folder_id = uploader.find_folder(
|
|
91
|
+
folder_name=folder_name,
|
|
92
|
+
parent_folder_id=drive_folder_id
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# Create folder only if it doesn't exist
|
|
96
|
+
if not new_folder_id:
|
|
97
|
+
print(f"[DRIVE] Creating new folder: {folder_name}")
|
|
98
|
+
new_folder_id = uploader.create_folder(
|
|
99
|
+
folder_name=folder_name,
|
|
100
|
+
parent_folder_id=drive_folder_id
|
|
101
|
+
)
|
|
102
|
+
print(f"[DRIVE] Folder created successfully\n")
|
|
103
|
+
else:
|
|
104
|
+
print(f"[DRIVE] Using existing folder\n")
|
|
105
|
+
|
|
106
|
+
# Upload to the new folder
|
|
107
|
+
upload_url = uploader.create_resumable_upload(
|
|
108
|
+
filename=filename,
|
|
109
|
+
folder_id=new_folder_id,
|
|
110
|
+
file_size=file_size
|
|
111
|
+
)
|
|
112
|
+
except Exception as e:
|
|
113
|
+
print(f"[ERROR] Failed to create upload session: {e}")
|
|
114
|
+
sys.exit(1)
|
|
115
|
+
|
|
116
|
+
state = state_mgr.create_initial_state(
|
|
117
|
+
s3_bucket=s3_bucket,
|
|
118
|
+
s3_key=s3_key,
|
|
119
|
+
upload_url=upload_url,
|
|
120
|
+
file_size=file_size,
|
|
121
|
+
chunk_size=DriveUpload.CHUNK_SIZE
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Start upload
|
|
125
|
+
upload_url = state['upload_url']
|
|
126
|
+
chunk_size = state['chunk_size']
|
|
127
|
+
uploaded_bytes = state['uploaded_bytes']
|
|
128
|
+
total_chunks = state['total_chunks']
|
|
129
|
+
|
|
130
|
+
print(f"[INFO] Chunk size: {chunk_size / (1024**2):.0f} MB")
|
|
131
|
+
print(f"[INFO] Total chunks: {total_chunks}")
|
|
132
|
+
print(f"[INFO] Starting from offset: {uploaded_bytes:,} bytes\n")
|
|
133
|
+
|
|
134
|
+
start_time = datetime.utcnow()
|
|
135
|
+
|
|
136
|
+
# Stream and upload chunks
|
|
137
|
+
chunk_index = uploaded_bytes // chunk_size
|
|
138
|
+
offset = uploaded_bytes
|
|
139
|
+
print(f"[INFO] offset: {offset} chunk_index: {chunk_index} file_size: {file_size} chunk_size: {chunk_size}")
|
|
140
|
+
while offset < file_size:
|
|
141
|
+
# Calculate chunk boundaries
|
|
142
|
+
end_byte = min(offset + chunk_size, file_size)
|
|
143
|
+
actual_chunk_size = end_byte - offset
|
|
144
|
+
byte_range = f'bytes={offset}-{end_byte-1}'
|
|
145
|
+
|
|
146
|
+
print(f"[CHUNK {chunk_index + 1}/{total_chunks}] "
|
|
147
|
+
f"Offset: {offset:,} - {end_byte:,} "
|
|
148
|
+
f"({actual_chunk_size / (1024**2):.1f} MB)")
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
# Read chunk from S3
|
|
152
|
+
print(f" → Reading from S3...")
|
|
153
|
+
response = s3_client.get_object(
|
|
154
|
+
Bucket=s3_bucket,
|
|
155
|
+
Key=s3_key,
|
|
156
|
+
Range=byte_range
|
|
157
|
+
)
|
|
158
|
+
chunk_data = response['Body'].read()
|
|
159
|
+
|
|
160
|
+
# Upload chunk to Drive
|
|
161
|
+
print(f" → Uploading to Drive... chunk_index: {chunk_index} offset: {offset}")
|
|
162
|
+
success, new_offset = uploader.upload_chunk(
|
|
163
|
+
chunk_data=chunk_data,
|
|
164
|
+
offset=offset,
|
|
165
|
+
file_size=file_size,
|
|
166
|
+
upload_url=upload_url
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
print(f" → New offset: {new_offset:,}" )
|
|
170
|
+
|
|
171
|
+
if not success:
|
|
172
|
+
raise Exception("Chunk upload failed")
|
|
173
|
+
|
|
174
|
+
# Update state
|
|
175
|
+
offset = new_offset
|
|
176
|
+
chunk_index += 1
|
|
177
|
+
state_mgr.update_progress(offset, chunk_index - 1)
|
|
178
|
+
|
|
179
|
+
# Progress
|
|
180
|
+
percent = (offset * 100.0 / file_size)
|
|
181
|
+
elapsed = (datetime.utcnow() - start_time).total_seconds()
|
|
182
|
+
speed = offset / elapsed if elapsed > 0 else 0
|
|
183
|
+
eta = (file_size - offset) / speed if speed > 0 else 0
|
|
184
|
+
|
|
185
|
+
print(f" ✅ Progress: {percent:.1f}% | "
|
|
186
|
+
f"Speed: {speed / (1024**2):.1f} MB/s | "
|
|
187
|
+
f"ETA: {eta / 60:.0f}min\n")
|
|
188
|
+
|
|
189
|
+
except Exception as e:
|
|
190
|
+
print(f"\n[ERROR] Failed to upload chunk: {e}")
|
|
191
|
+
print(f"[INFO] State saved. You can resume by running the same command again.")
|
|
192
|
+
sys.exit(1)
|
|
193
|
+
|
|
194
|
+
# Upload complete!
|
|
195
|
+
elapsed = (datetime.utcnow() - start_time).total_seconds()
|
|
196
|
+
print(f"\n{'='*60}")
|
|
197
|
+
print(f"✅ UPLOAD COMPLETED SUCCESSFULLY!")
|
|
198
|
+
print(f"{'='*60}")
|
|
199
|
+
print(f"File: {filename}")
|
|
200
|
+
print(f"Size: {file_size:,} bytes ({file_size / (1024**3):.2f} GB)")
|
|
201
|
+
print(f"Time: {elapsed / 60:.1f} minutes")
|
|
202
|
+
print(f"Average speed: {file_size / elapsed / (1024**2):.1f} MB/s")
|
|
203
|
+
print(f"{'='*60}\n")
|
|
204
|
+
|
|
205
|
+
# Clean up state
|
|
206
|
+
state_mgr.delete()
|
|
207
|
+
print(f"[INFO] State file cleaned. Ready for next upload!")
|
|
208
|
+
|
|
209
|
+
#!/usr/bin/env python3
|
|
210
|
+
"""
|
|
211
|
+
Drive Uploader - Handles Google Drive resumable upload with 429 retry logic
|
|
212
|
+
"""
|
|
213
|
+
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import boto3
|
|
3
|
+
from app.drive_uploader import stream_s3_to_drive
|
|
4
|
+
from app.db import SessionLocal
|
|
5
|
+
from app.models import FileGenerateHistory, Nursery
|
|
6
|
+
from app.config import S3_BUCKET, DRIVE_FOLDER_ID, SQS_QUEUE_URL, AWS_REGION
|
|
7
|
+
|
|
8
|
+
def main(file_generate_history_id, receipt_handle):
|
|
9
|
+
"""
|
|
10
|
+
S3 → Google Drive 転送のメイン処理。
|
|
11
|
+
|
|
12
|
+
[Input]
|
|
13
|
+
- file_generate_history_id (int) : 転送対象のファイル生成履歴 ID(DB のプライマリキー)
|
|
14
|
+
- receipt_handle (str) : SQS メッセージの受信ハンドル(削除に使用)
|
|
15
|
+
|
|
16
|
+
[Output / 副作用]
|
|
17
|
+
- S3 上の ZIP ファイルを Google Drive の指定フォルダへアップロードする
|
|
18
|
+
- 転送完了・失敗に関わらず after_transfer_file を呼び出し SQS メッセージを削除する
|
|
19
|
+
|
|
20
|
+
[処理フロー]
|
|
21
|
+
1. DB から FileGenerateHistory レコードを取得し、対象 S3 キーとフォルダ名を組み立てる
|
|
22
|
+
- S3 キー : "{nursery_id}/batch_downloads/{s3_id}"
|
|
23
|
+
- フォルダ名: "{nursery_id}_{nursery_name(サニタイズ済み)}"
|
|
24
|
+
2. stream_s3_to_drive を呼び出し、S3 → Drive へチャンク転送する
|
|
25
|
+
3. 正常完了時: "🎉 DONE" を出力し、SQS メッセージを削除する
|
|
26
|
+
4. KeyboardInterrupt 発生時: 状態を保存してプロセスを終了する(再実行でレジューム可能)
|
|
27
|
+
5. その他例外発生時: エラーログを出力し、SQS メッセージを削除してプロセスを終了する
|
|
28
|
+
"""
|
|
29
|
+
db = SessionLocal()
|
|
30
|
+
file_history = (
|
|
31
|
+
db.query(FileGenerateHistory)
|
|
32
|
+
.filter(FileGenerateHistory.id == file_generate_history_id)
|
|
33
|
+
.first()
|
|
34
|
+
)
|
|
35
|
+
nursery = (
|
|
36
|
+
db.query(Nursery)
|
|
37
|
+
.filter(Nursery.id == file_history.nursery_id)
|
|
38
|
+
.first()
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
s3_key = f"{file_history.nursery_id}/batch_downloads/{file_history.s3_id}"
|
|
42
|
+
folder_name = f"{nursery.id}_{sanitize_filename(nursery.name)}"
|
|
43
|
+
print(f"s3_key : {s3_key}")
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
stream_s3_to_drive(
|
|
47
|
+
s3_bucket=S3_BUCKET,
|
|
48
|
+
s3_key=s3_key,
|
|
49
|
+
drive_folder_id=DRIVE_FOLDER_ID,
|
|
50
|
+
folder_name=folder_name
|
|
51
|
+
)
|
|
52
|
+
except KeyboardInterrupt:
|
|
53
|
+
print("\n\n[INTERRUPTED] Upload interrupted by user")
|
|
54
|
+
print("[INFO] State saved. Resume by running the same command again.")
|
|
55
|
+
after_transfer_file(receipt_handle)
|
|
56
|
+
sys.exit(1)
|
|
57
|
+
except Exception as e:
|
|
58
|
+
print(f"\n[FATAL ERROR] {e}")
|
|
59
|
+
import traceback
|
|
60
|
+
traceback.print_exc()
|
|
61
|
+
after_transfer_file(receipt_handle)
|
|
62
|
+
sys.exit(1)
|
|
63
|
+
|
|
64
|
+
print("🎉 DONE")
|
|
65
|
+
after_transfer_file(receipt_handle)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def after_transfer_file(receipt_handle):
|
|
69
|
+
"""
|
|
70
|
+
転送完了後に SQS メッセージを削除するクリーンアップ処理。
|
|
71
|
+
|
|
72
|
+
[Input]
|
|
73
|
+
- receipt_handle (str) : SQS メッセージの受信ハンドル
|
|
74
|
+
None または空文字の場合は削除をスキップする
|
|
75
|
+
|
|
76
|
+
[Output / 副作用]
|
|
77
|
+
- SQS キューから対象メッセージを削除する
|
|
78
|
+
- SQS_QUEUE_URL が未設定の場合はスキップしてログを出力する
|
|
79
|
+
|
|
80
|
+
[処理フロー]
|
|
81
|
+
1. receipt_handle が未指定の場合はスキップ
|
|
82
|
+
2. SQS_QUEUE_URL が未設定の場合はスキップ
|
|
83
|
+
3. boto3 SQS クライアントを生成し、delete_message を呼び出す
|
|
84
|
+
4. 削除成功 → "[SUCCESS]" ログを出力
|
|
85
|
+
5. 削除失敗 → "[ERROR]" ログを出力(プロセスは継続)
|
|
86
|
+
"""
|
|
87
|
+
if not receipt_handle:
|
|
88
|
+
print("[WARNING] No receipt_handle provided, skipping SQS message deletion")
|
|
89
|
+
return
|
|
90
|
+
|
|
91
|
+
if not SQS_QUEUE_URL:
|
|
92
|
+
print("[WARNING] SQS_QUEUE_URL not configured, skipping SQS message deletion")
|
|
93
|
+
return
|
|
94
|
+
|
|
95
|
+
try:
|
|
96
|
+
sqs_client = boto3.client('sqs', region_name=AWS_REGION)
|
|
97
|
+
sqs_client.delete_message(
|
|
98
|
+
QueueUrl=SQS_QUEUE_URL,
|
|
99
|
+
ReceiptHandle=receipt_handle
|
|
100
|
+
)
|
|
101
|
+
print(f"[SUCCESS] Message deleted from SQS queue")
|
|
102
|
+
except Exception as e:
|
|
103
|
+
print(f"[ERROR] Failed to delete message from SQS: {e}")
|
|
104
|
+
import traceback
|
|
105
|
+
traceback.print_exc()
|
|
106
|
+
|
|
107
|
+
def sanitize_filename(filename):
|
|
108
|
+
"""Remove invalid characters from filename"""
|
|
109
|
+
return filename.replace("/", "_").replace("\\", "_")
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
if __name__ == "__main__":
|
|
113
|
+
if len(sys.argv) < 2:
|
|
114
|
+
print("Usage: python -m app.main <file_generate_history_id> <receipt_handle>")
|
|
115
|
+
sys.exit(1)
|
|
116
|
+
file_generate_history_id = int(sys.argv[1])
|
|
117
|
+
receipt_handle = sys.argv[2]
|
|
118
|
+
main(file_generate_history_id, receipt_handle)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
|
2
|
+
from sqlalchemy import BigInteger, String, DateTime, ForeignKey
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Base(DeclarativeBase):
|
|
7
|
+
pass
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Nursery(Base):
|
|
11
|
+
__tablename__ = "nursery"
|
|
12
|
+
|
|
13
|
+
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
|
14
|
+
name: Mapped[str] = mapped_column(String(255))
|
|
15
|
+
|
|
16
|
+
file_generate_histories = relationship(
|
|
17
|
+
"FileGenerateHistory",
|
|
18
|
+
back_populates="nursery",
|
|
19
|
+
lazy="select"
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class FileGenerateHistory(Base):
|
|
24
|
+
__tablename__ = "file_generate_histories"
|
|
25
|
+
|
|
26
|
+
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
|
|
27
|
+
s3_id: Mapped[str] = mapped_column(String(512), unique=True, index=True)
|
|
28
|
+
|
|
29
|
+
nursery_id: Mapped[int] = mapped_column(
|
|
30
|
+
BigInteger,
|
|
31
|
+
ForeignKey("nursery.id"),
|
|
32
|
+
index=True
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
nursery = relationship(
|
|
36
|
+
"Nursery",
|
|
37
|
+
back_populates="file_generate_histories"
|
|
38
|
+
)
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
State Manager - Handles persistence of upload state to enable resume functionality
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
from datetime import datetime
|
|
9
|
+
from typing import Dict, Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class StateManager:
|
|
13
|
+
"""Manages upload state persistence"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, state_file: str = 'state.json'):
|
|
16
|
+
"""
|
|
17
|
+
[Input]
|
|
18
|
+
- state_file (str) : 状態を保存するファイルパス(デフォルト: 'state.json')
|
|
19
|
+
"""
|
|
20
|
+
self.state_file = state_file
|
|
21
|
+
self.state: Optional[Dict] = None
|
|
22
|
+
|
|
23
|
+
def load(self) -> Optional[Dict]:
|
|
24
|
+
"""
|
|
25
|
+
状態ファイルを読み込む。
|
|
26
|
+
|
|
27
|
+
[Input] なし(state_file パスを使用)
|
|
28
|
+
[Output]
|
|
29
|
+
- Dict : 有効な状態データが存在する場合はその内容を返す
|
|
30
|
+
- None : ファイルが存在しない・形式が無効・必須フィールド欠損の場合は None を返す
|
|
31
|
+
|
|
32
|
+
[処理フロー]
|
|
33
|
+
1. state_file が存在しない場合は None を返す
|
|
34
|
+
2. JSON をパースして state に読み込む
|
|
35
|
+
3. 必須フィールド(upload_url / file_size / uploaded_bytes / s3_key)の存在を検証する
|
|
36
|
+
4. 検証 OK なら state を返す。失敗なら None を返す
|
|
37
|
+
"""
|
|
38
|
+
if not os.path.exists(self.state_file):
|
|
39
|
+
return None
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
with open(self.state_file, 'r', encoding='utf-8') as f:
|
|
43
|
+
self.state = json.load(f)
|
|
44
|
+
|
|
45
|
+
# Validate state
|
|
46
|
+
required_fields = ['upload_url', 'file_size', 'uploaded_bytes', 's3_key']
|
|
47
|
+
if not all(field in self.state for field in required_fields):
|
|
48
|
+
print(f"[WARNING] Invalid state file, missing fields. Starting fresh...")
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
print(f"[STATE] Loaded: {self.state['uploaded_bytes']:,} / {self.state['file_size']:,} bytes")
|
|
52
|
+
return self.state
|
|
53
|
+
|
|
54
|
+
except (json.JSONDecodeError, IOError) as e:
|
|
55
|
+
print(f"[WARNING] Failed to load state: {e}. Starting fresh...")
|
|
56
|
+
return None
|
|
57
|
+
|
|
58
|
+
def save(self, state: Dict) -> None:
|
|
59
|
+
"""
|
|
60
|
+
状態データをファイルに保存する(アトミック書き込み)。
|
|
61
|
+
|
|
62
|
+
[Input]
|
|
63
|
+
- state (Dict) : 保存する状態データ(last_update が自動付与される)
|
|
64
|
+
|
|
65
|
+
[Output / 副作用]
|
|
66
|
+
- state.json を上書き保存する
|
|
67
|
+
- 書き込みは一時ファイル(state.json.tmp)経由のアトミック操作で行う
|
|
68
|
+
→ 書き込み途中でのクラッシュによるファイル破損を防ぐ
|
|
69
|
+
|
|
70
|
+
[処理フロー]
|
|
71
|
+
1. state に last_update(現在時刻)を付与する
|
|
72
|
+
2. 一時ファイルに JSON を書き込む
|
|
73
|
+
3. 旧ファイルを削除し、一時ファイルをリネームして置き換える
|
|
74
|
+
"""
|
|
75
|
+
self.state = state
|
|
76
|
+
self.state['last_update'] = datetime.utcnow().isoformat()
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
# Atomic write: write to temp file then rename
|
|
80
|
+
temp_file = f"{self.state_file}.tmp"
|
|
81
|
+
with open(temp_file, 'w', encoding='utf-8') as f:
|
|
82
|
+
json.dump(self.state, f, indent=2, ensure_ascii=False)
|
|
83
|
+
|
|
84
|
+
# Atomic rename
|
|
85
|
+
if os.path.exists(self.state_file):
|
|
86
|
+
os.remove(self.state_file)
|
|
87
|
+
os.rename(temp_file, self.state_file)
|
|
88
|
+
|
|
89
|
+
except IOError as e:
|
|
90
|
+
print(f"[ERROR] Failed to save state: {e}")
|
|
91
|
+
raise
|
|
92
|
+
|
|
93
|
+
def delete(self) -> None:
|
|
94
|
+
"""
|
|
95
|
+
状態ファイルを削除する(アップロード完了後のクリーンアップ)。
|
|
96
|
+
|
|
97
|
+
[Input] なし
|
|
98
|
+
[Output] state.json を削除する(ファイルが存在しない場合は何もしない)
|
|
99
|
+
"""
|
|
100
|
+
if os.path.exists(self.state_file):
|
|
101
|
+
os.remove(self.state_file)
|
|
102
|
+
print(f"[STATE] Deleted {self.state_file}")
|
|
103
|
+
|
|
104
|
+
def create_initial_state(self, s3_bucket: str, s3_key: str,
|
|
105
|
+
upload_url: str, file_size: int,
|
|
106
|
+
chunk_size: int) -> Dict:
|
|
107
|
+
"""
|
|
108
|
+
新規アップロードの初期状態を作成して保存する。
|
|
109
|
+
|
|
110
|
+
[Input]
|
|
111
|
+
- s3_bucket (str) : 転送元 S3 バケット名
|
|
112
|
+
- s3_key (str) : 転送元 S3 オブジェクトキー
|
|
113
|
+
- upload_url (str) : Google Drive の resumable upload URL
|
|
114
|
+
- file_size (int) : ファイルの総バイト数
|
|
115
|
+
- chunk_size (int) : 1 チャンクあたりのバイト数
|
|
116
|
+
|
|
117
|
+
[Output]
|
|
118
|
+
- Dict : 初期状態データ(total_chunks・uploaded_bytes=0 などを含む)
|
|
119
|
+
|
|
120
|
+
[処理フロー]
|
|
121
|
+
1. 初期状態 Dict を組み立てる(uploaded_bytes=0, last_chunk_uploaded=-1)
|
|
122
|
+
2. total_chunks = ceil(file_size / chunk_size) を計算する
|
|
123
|
+
3. save() を呼び出して state.json に書き込む
|
|
124
|
+
4. state Dict を返す
|
|
125
|
+
"""
|
|
126
|
+
print(f"[DRIVE] Upload URL: {upload_url}")
|
|
127
|
+
state = {
|
|
128
|
+
's3_bucket': s3_bucket,
|
|
129
|
+
's3_key': s3_key,
|
|
130
|
+
'upload_url': upload_url,
|
|
131
|
+
'file_size': file_size,
|
|
132
|
+
'uploaded_bytes': 0,
|
|
133
|
+
'chunk_size': chunk_size,
|
|
134
|
+
'total_chunks': (file_size + chunk_size - 1) // chunk_size,
|
|
135
|
+
'last_chunk_uploaded': -1,
|
|
136
|
+
'started_at': datetime.utcnow().isoformat(),
|
|
137
|
+
'last_update': datetime.utcnow().isoformat()
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
self.save(state)
|
|
141
|
+
return state
|
|
142
|
+
|
|
143
|
+
def update_progress(self, uploaded_bytes: int, chunk_index: int) -> None:
|
|
144
|
+
"""
|
|
145
|
+
アップロード進捗を更新して state.json に保存する。
|
|
146
|
+
|
|
147
|
+
[Input]
|
|
148
|
+
- uploaded_bytes (int) : アップロード済みの累計バイト数
|
|
149
|
+
- chunk_index (int) : 最後にアップロードしたチャンクの index(0 始まり)
|
|
150
|
+
|
|
151
|
+
[Output / 副作用]
|
|
152
|
+
- self.state を更新し、save() を呼び出して state.json に永続化する
|
|
153
|
+
- state が未初期化(None)の場合は ValueError を送出する
|
|
154
|
+
"""
|
|
155
|
+
if self.state is None:
|
|
156
|
+
raise ValueError("State not initialized")
|
|
157
|
+
|
|
158
|
+
self.state['uploaded_bytes'] = uploaded_bytes
|
|
159
|
+
self.state['last_chunk_uploaded'] = chunk_index
|
|
160
|
+
self.save(self.state)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# version: "3.8"
|
|
2
|
+
|
|
3
|
+
services:
|
|
4
|
+
app:
|
|
5
|
+
build: .
|
|
6
|
+
volumes:
|
|
7
|
+
- ./service_account.json:/app/service_account.json
|
|
8
|
+
env_file:
|
|
9
|
+
- .env
|
|
10
|
+
depends_on:
|
|
11
|
+
- db
|
|
12
|
+
|
|
13
|
+
db:
|
|
14
|
+
image: postgres:15
|
|
15
|
+
environment:
|
|
16
|
+
POSTGRES_DB: uploader
|
|
17
|
+
POSTGRES_USER: postgres
|
|
18
|
+
POSTGRES_PASSWORD: postgres
|
|
19
|
+
ports:
|
|
20
|
+
- "5433:5432"
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# --- Database ---
|
|
2
|
+
SQLAlchemy==2.0.36
|
|
3
|
+
PyMySQL==1.1.1
|
|
4
|
+
mysql-connector-python==8.0.33
|
|
5
|
+
psycopg2-binary==2.9.9
|
|
6
|
+
|
|
7
|
+
# --- AWS S3 ---
|
|
8
|
+
boto3==1.26.162
|
|
9
|
+
botocore==1.29.162
|
|
10
|
+
|
|
11
|
+
# --- Google Drive API ---
|
|
12
|
+
google-api-python-client==2.149.0
|
|
13
|
+
google-auth==2.35.0
|
|
14
|
+
google-auth-httplib2==0.3.0
|
|
15
|
+
google-auth-oauthlib==1.2.1
|
|
16
|
+
google-resumable-media>=2.0.0
|
|
17
|
+
|
|
18
|
+
# --- Utils ---
|
|
19
|
+
python-dotenv==1.0.1
|
|
20
|
+
requests==2.32.3
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "service_account",
|
|
3
|
+
"project_id": "kidsly-project-7df50",
|
|
4
|
+
"private_key_id": "4edb01f1b83b1c470c6d92c19100128eedccfa4e",
|
|
5
|
+
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQCnlZJfESLR++FS\nr/QmNFSEadTpiYlKfMZ/eJmfeC1Ygybw/CPcExphXOG3+3KqKIL03jopml7tYUOl\n/QhWd7vr1KBHG4DvmWBx5YYIAQ9R+l1tLGUCZUAxquY9vaWCkAcBEEXqJd/UFhka\nb7LNmRSthQmU82EKv5ze5QXXzNYdQsUHq4tw4nyeoMzbz8QIVRo0ebuGoBvBBLzM\nw5HNxdGH6EjALyR9ODvUjdEFwMCGCCEh3psqFtHteFwlIs++BUNeL93peDMYwAYL\nnHmdREVGUlwdmV3Lz9MqTpgEsZ3YBDSThGgyW+qPlDQmNcmsaTqh6jSVyNqplgw7\n1eDe2SKpAgMBAAECggEAUj9l1ZOe78j7lq31mRSA0HXygWW3hF/FugUR/IKqWrEP\nmSFeZiHYBiXkP/xAFAQK9avlhsqVQVLFtT8A6xMbn0w9F3kHqFAx1eRsihwfDUBf\nvveVYX4Y6edy1aVwiCowzdRTQ+4vsrLfwipE6oPxcpIDGF/0ZiaRkpF7cyS8hyIV\nGPPigo6SdrvqFGAbaavdmpyZsdnZZ3BBd9MnDuZfGQwE8tlXKhlWL6LDcbo1Gq2h\nf/SArv6TDasaCTrbKuQQF5OzASxLCC5s2J28PrYcmzFIew/klnc2NpWHUTbj3R0Y\nwriy8BneL3PeqSRsleuN8pPWLVIj0ZHmjUvdC5jCkwKBgQDUjQeJ9koFGW1HDMSM\n1JLYpKTNI3gkYHkpsjczdOjTRC2BrUiShOCyP4kvrQRXDttTAqQAKT5fjH0mRtPV\nt6YZndM4A++PB4dQXEus/XjUA5gkARcrknp2So5+hrbBeZ4gtHqwtI+zhlCrvDC/\nHO37adQ6pEhp4/mIVPBssqZiKwKBgQDJ12ZjB6ZZ8SGj3jA/OQR+T0s8kNpU1JZg\nN3Z0QhCkywyNpGHqf92Bp7LOZityUB4v18vOUtFrJWT1Cfweki/o/sRLaSmn0PLB\n0Gu41p0xkdNgYKrOZO9WfAh24MtXKVb9/0TCjVjq2m6eVFKSp3P9Q9vqsUgEd40I\nqnaxRFPoewKBgQDHNvuW6jkMOH28txHNcTvFQ+OWxaTIa4eEdh3Y472IrgD85t37\nPgGf7370KXTE6A7W6cWuxPlJ8tIluwK27+8FWXBrsvyoEQVZY3gmRlgmVswVCtSd\nqSeQkWam3jqSchusSZwo3MNHHx+jJrqFlWbUWqdo4ytf3CW4Cuxg5LyfiwKBgQC/\nb0qutjyypZ8wWrYL4LpASlijcweA2bhePXxdu6MpiWO4m6Y60Vd3d2SBmimu3Aol\neljUsXvqUac1OWkkXaaPJ8Npeq6FegokhNcGkpa4wwRmLokSBG1z62PjzEVPv+fA\nM/I1S/Z/mPi9tmAytBuwpEHakJYrlm/vPi4g5xtmLQKBgQCpSA3Z2DOgfev2527A\nNrKTQUFwPAItpRk7UGJQAOOZw0triVwB9Anwy84zB8yzG73D46Gxougl3ANKC1nT\n2EmyBLQ1u8M6hvO43bCwh0IsUAthbn92mGMZqvyhRSdCDbvR0MWrg949He/vvyVy\nz/aJmMzZ8ystLILMvvqk1nu4OA==\n-----END PRIVATE KEY-----\n",
|
|
6
|
+
"client_email": "transfer-file-from-s3-to-drive@kidsly-project-7df50.iam.gserviceaccount.com",
|
|
7
|
+
"client_id": "110421830388586121859",
|
|
8
|
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
|
9
|
+
"token_uri": "https://oauth2.googleapis.com/token",
|
|
10
|
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
|
11
|
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/transfer-file-from-s3-to-drive%40kidsly-project-7df50.iam.gserviceaccount.com",
|
|
12
|
+
"universe_domain": "googleapis.com"
|
|
13
|
+
}
|
data/lib/s3_to_drive.rb
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "s3_to_drive/version"
|
|
4
|
+
require_relative "s3_to_drive/client/configuration"
|
|
5
|
+
require_relative "s3_to_drive/client/client"
|
|
6
|
+
|
|
7
|
+
module S3ToDrive
|
|
8
|
+
extend Configuration
|
|
9
|
+
|
|
10
|
+
def self.new
|
|
11
|
+
S3ToDrive::Client::Client.new
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.method_missing(method_name, *args, &block)
|
|
15
|
+
return super unless new.respond_to?(method_name)
|
|
16
|
+
new.send(method_name, *args, &block)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def self.respond_to?(method_name, include_private = false)
|
|
20
|
+
new.respond_to?(method_name, include_private) || super
|
|
21
|
+
end
|
|
22
|
+
end
|
data/sig/s3_to_drive.rbs
ADDED
metadata
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: s3_to_drive
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- mirabo-hoang-lx
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2026-03-19 00:00:00.000000000 Z
|
|
12
|
+
dependencies: []
|
|
13
|
+
description: A simple gem to transfer files from AWS S3 to Google Drive.
|
|
14
|
+
email:
|
|
15
|
+
- hoanglx@migrabo-global.com
|
|
16
|
+
executables: []
|
|
17
|
+
extensions: []
|
|
18
|
+
extra_rdoc_files: []
|
|
19
|
+
files:
|
|
20
|
+
- ".rubocop.yml"
|
|
21
|
+
- LICENSE.txt
|
|
22
|
+
- README.md
|
|
23
|
+
- Rakefile
|
|
24
|
+
- lib/concerns/transfer_s3_to_drive/.env
|
|
25
|
+
- lib/concerns/transfer_s3_to_drive/Dockerfile
|
|
26
|
+
- 'lib/concerns/transfer_s3_to_drive/app/__init__.py '
|
|
27
|
+
- lib/concerns/transfer_s3_to_drive/app/__pycache__/main.cpython-312.pyc
|
|
28
|
+
- lib/concerns/transfer_s3_to_drive/app/config.py
|
|
29
|
+
- lib/concerns/transfer_s3_to_drive/app/db.py
|
|
30
|
+
- lib/concerns/transfer_s3_to_drive/app/drive_upload.py
|
|
31
|
+
- lib/concerns/transfer_s3_to_drive/app/drive_uploader.py
|
|
32
|
+
- lib/concerns/transfer_s3_to_drive/app/kidsly-admin.code-workspace
|
|
33
|
+
- lib/concerns/transfer_s3_to_drive/app/main.py
|
|
34
|
+
- lib/concerns/transfer_s3_to_drive/app/models.py
|
|
35
|
+
- lib/concerns/transfer_s3_to_drive/app/state_manager.py
|
|
36
|
+
- lib/concerns/transfer_s3_to_drive/docker-compose.yml
|
|
37
|
+
- lib/concerns/transfer_s3_to_drive/requirements.txt
|
|
38
|
+
- lib/concerns/transfer_s3_to_drive/service_account.json
|
|
39
|
+
- lib/s3_to_drive.rb
|
|
40
|
+
- lib/s3_to_drive/version.rb
|
|
41
|
+
- sig/s3_to_drive.rbs
|
|
42
|
+
homepage: https://github.com/XuanHoang1302/s3_to_drive.git
|
|
43
|
+
licenses:
|
|
44
|
+
- MIT
|
|
45
|
+
metadata:
|
|
46
|
+
homepage_uri: https://github.com/XuanHoang1302/s3_to_drive.git
|
|
47
|
+
source_code_uri: https://github.com/XuanHoang1302/s3_to_drive.git
|
|
48
|
+
changelog_uri: https://github.com/XuanHoang1302/s3_to_drive/blob/main/CHANGELOG.md
|
|
49
|
+
post_install_message:
|
|
50
|
+
rdoc_options: []
|
|
51
|
+
require_paths:
|
|
52
|
+
- lib
|
|
53
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
54
|
+
requirements:
|
|
55
|
+
- - ">="
|
|
56
|
+
- !ruby/object:Gem::Version
|
|
57
|
+
version: 2.5.0
|
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
59
|
+
requirements:
|
|
60
|
+
- - ">="
|
|
61
|
+
- !ruby/object:Gem::Version
|
|
62
|
+
version: '0'
|
|
63
|
+
requirements: []
|
|
64
|
+
rubygems_version: 3.4.10
|
|
65
|
+
signing_key:
|
|
66
|
+
specification_version: 4
|
|
67
|
+
summary: My custom gem s3 to drive.
|
|
68
|
+
test_files: []
|