aimodelshare 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aimodelshare/README.md +26 -0
- aimodelshare/__init__.py +100 -0
- aimodelshare/aimsonnx.py +2381 -0
- aimodelshare/api.py +836 -0
- aimodelshare/auth.py +163 -0
- aimodelshare/aws.py +511 -0
- aimodelshare/aws_client.py +173 -0
- aimodelshare/base_image.py +154 -0
- aimodelshare/bucketpolicy.py +106 -0
- aimodelshare/color_mappings/color_mapping_keras.csv +121 -0
- aimodelshare/color_mappings/color_mapping_pytorch.csv +117 -0
- aimodelshare/containerisation.py +244 -0
- aimodelshare/containerization.py +712 -0
- aimodelshare/containerization_templates/Dockerfile.txt +8 -0
- aimodelshare/containerization_templates/Dockerfile_PySpark.txt +23 -0
- aimodelshare/containerization_templates/buildspec.txt +14 -0
- aimodelshare/containerization_templates/lambda_function.txt +40 -0
- aimodelshare/custom_approach/__init__.py +1 -0
- aimodelshare/custom_approach/lambda_function.py +17 -0
- aimodelshare/custom_eval_metrics.py +103 -0
- aimodelshare/data_sharing/__init__.py +0 -0
- aimodelshare/data_sharing/data_sharing_templates/Dockerfile.txt +3 -0
- aimodelshare/data_sharing/data_sharing_templates/__init__.py +1 -0
- aimodelshare/data_sharing/data_sharing_templates/buildspec.txt +15 -0
- aimodelshare/data_sharing/data_sharing_templates/codebuild_policies.txt +129 -0
- aimodelshare/data_sharing/data_sharing_templates/codebuild_trust_relationship.txt +12 -0
- aimodelshare/data_sharing/download_data.py +620 -0
- aimodelshare/data_sharing/share_data.py +373 -0
- aimodelshare/data_sharing/utils.py +8 -0
- aimodelshare/deploy_custom_lambda.py +246 -0
- aimodelshare/documentation/Makefile +20 -0
- aimodelshare/documentation/karma_sphinx_theme/__init__.py +28 -0
- aimodelshare/documentation/karma_sphinx_theme/_version.py +2 -0
- aimodelshare/documentation/karma_sphinx_theme/breadcrumbs.html +70 -0
- aimodelshare/documentation/karma_sphinx_theme/layout.html +172 -0
- aimodelshare/documentation/karma_sphinx_theme/search.html +50 -0
- aimodelshare/documentation/karma_sphinx_theme/searchbox.html +14 -0
- aimodelshare/documentation/karma_sphinx_theme/static/css/custom.css +2 -0
- aimodelshare/documentation/karma_sphinx_theme/static/css/custom.css.map +1 -0
- aimodelshare/documentation/karma_sphinx_theme/static/css/theme.css +2751 -0
- aimodelshare/documentation/karma_sphinx_theme/static/css/theme.css.map +1 -0
- aimodelshare/documentation/karma_sphinx_theme/static/css/theme.min.css +2 -0
- aimodelshare/documentation/karma_sphinx_theme/static/css/theme.min.css.map +1 -0
- aimodelshare/documentation/karma_sphinx_theme/static/font/fontello.eot +0 -0
- aimodelshare/documentation/karma_sphinx_theme/static/font/fontello.svg +32 -0
- aimodelshare/documentation/karma_sphinx_theme/static/font/fontello.ttf +0 -0
- aimodelshare/documentation/karma_sphinx_theme/static/font/fontello.woff +0 -0
- aimodelshare/documentation/karma_sphinx_theme/static/font/fontello.woff2 +0 -0
- aimodelshare/documentation/karma_sphinx_theme/static/js/theme.js +68 -0
- aimodelshare/documentation/karma_sphinx_theme/theme.conf +9 -0
- aimodelshare/documentation/make.bat +35 -0
- aimodelshare/documentation/requirements.txt +2 -0
- aimodelshare/documentation/source/about.rst +18 -0
- aimodelshare/documentation/source/advanced_features.rst +137 -0
- aimodelshare/documentation/source/competition.rst +218 -0
- aimodelshare/documentation/source/conf.py +58 -0
- aimodelshare/documentation/source/create_credentials.rst +86 -0
- aimodelshare/documentation/source/example_notebooks.rst +132 -0
- aimodelshare/documentation/source/functions.rst +151 -0
- aimodelshare/documentation/source/gettingstarted.rst +390 -0
- aimodelshare/documentation/source/images/creds1.png +0 -0
- aimodelshare/documentation/source/images/creds2.png +0 -0
- aimodelshare/documentation/source/images/creds3.png +0 -0
- aimodelshare/documentation/source/images/creds4.png +0 -0
- aimodelshare/documentation/source/images/creds5.png +0 -0
- aimodelshare/documentation/source/images/creds_file_example.png +0 -0
- aimodelshare/documentation/source/images/predict_tab.png +0 -0
- aimodelshare/documentation/source/index.rst +110 -0
- aimodelshare/documentation/source/modelplayground.rst +132 -0
- aimodelshare/exceptions.py +11 -0
- aimodelshare/generatemodelapi.py +1270 -0
- aimodelshare/iam/codebuild_policy.txt +129 -0
- aimodelshare/iam/codebuild_trust_relationship.txt +12 -0
- aimodelshare/iam/lambda_policy.txt +15 -0
- aimodelshare/iam/lambda_trust_relationship.txt +12 -0
- aimodelshare/json_templates/__init__.py +1 -0
- aimodelshare/json_templates/api_json.txt +155 -0
- aimodelshare/json_templates/auth/policy.txt +1 -0
- aimodelshare/json_templates/auth/role.txt +1 -0
- aimodelshare/json_templates/eval/policy.txt +1 -0
- aimodelshare/json_templates/eval/role.txt +1 -0
- aimodelshare/json_templates/function/policy.txt +1 -0
- aimodelshare/json_templates/function/role.txt +1 -0
- aimodelshare/json_templates/integration_response.txt +5 -0
- aimodelshare/json_templates/lambda_policy_1.txt +15 -0
- aimodelshare/json_templates/lambda_policy_2.txt +8 -0
- aimodelshare/json_templates/lambda_role_1.txt +12 -0
- aimodelshare/json_templates/lambda_role_2.txt +16 -0
- aimodelshare/leaderboard.py +174 -0
- aimodelshare/main/1.txt +132 -0
- aimodelshare/main/1B.txt +112 -0
- aimodelshare/main/2.txt +153 -0
- aimodelshare/main/3.txt +134 -0
- aimodelshare/main/4.txt +128 -0
- aimodelshare/main/5.txt +109 -0
- aimodelshare/main/6.txt +105 -0
- aimodelshare/main/7.txt +144 -0
- aimodelshare/main/8.txt +142 -0
- aimodelshare/main/__init__.py +1 -0
- aimodelshare/main/authorization.txt +275 -0
- aimodelshare/main/eval_classification.txt +79 -0
- aimodelshare/main/eval_lambda.txt +1709 -0
- aimodelshare/main/eval_regression.txt +80 -0
- aimodelshare/main/lambda_function.txt +8 -0
- aimodelshare/main/nst.txt +149 -0
- aimodelshare/model.py +1543 -0
- aimodelshare/modeluser.py +215 -0
- aimodelshare/moral_compass/README.md +408 -0
- aimodelshare/moral_compass/__init__.py +65 -0
- aimodelshare/moral_compass/_version.py +3 -0
- aimodelshare/moral_compass/api_client.py +601 -0
- aimodelshare/moral_compass/apps/__init__.py +69 -0
- aimodelshare/moral_compass/apps/ai_consequences.py +540 -0
- aimodelshare/moral_compass/apps/bias_detective.py +714 -0
- aimodelshare/moral_compass/apps/ethical_revelation.py +898 -0
- aimodelshare/moral_compass/apps/fairness_fixer.py +889 -0
- aimodelshare/moral_compass/apps/judge.py +888 -0
- aimodelshare/moral_compass/apps/justice_equity_upgrade.py +853 -0
- aimodelshare/moral_compass/apps/mc_integration_helpers.py +820 -0
- aimodelshare/moral_compass/apps/model_building_game.py +1104 -0
- aimodelshare/moral_compass/apps/model_building_game_beginner.py +687 -0
- aimodelshare/moral_compass/apps/moral_compass_challenge.py +858 -0
- aimodelshare/moral_compass/apps/session_auth.py +254 -0
- aimodelshare/moral_compass/apps/shared_activity_styles.css +349 -0
- aimodelshare/moral_compass/apps/tutorial.py +481 -0
- aimodelshare/moral_compass/apps/what_is_ai.py +853 -0
- aimodelshare/moral_compass/challenge.py +365 -0
- aimodelshare/moral_compass/config.py +187 -0
- aimodelshare/placeholders/model.onnx +0 -0
- aimodelshare/placeholders/preprocessor.zip +0 -0
- aimodelshare/playground.py +1968 -0
- aimodelshare/postprocessormodules.py +157 -0
- aimodelshare/preprocessormodules.py +373 -0
- aimodelshare/pyspark/1.txt +195 -0
- aimodelshare/pyspark/1B.txt +181 -0
- aimodelshare/pyspark/2.txt +220 -0
- aimodelshare/pyspark/3.txt +204 -0
- aimodelshare/pyspark/4.txt +187 -0
- aimodelshare/pyspark/5.txt +178 -0
- aimodelshare/pyspark/6.txt +174 -0
- aimodelshare/pyspark/7.txt +211 -0
- aimodelshare/pyspark/8.txt +206 -0
- aimodelshare/pyspark/__init__.py +1 -0
- aimodelshare/pyspark/authorization.txt +258 -0
- aimodelshare/pyspark/eval_classification.txt +79 -0
- aimodelshare/pyspark/eval_lambda.txt +1441 -0
- aimodelshare/pyspark/eval_regression.txt +80 -0
- aimodelshare/pyspark/lambda_function.txt +8 -0
- aimodelshare/pyspark/nst.txt +213 -0
- aimodelshare/python/my_preprocessor.py +58 -0
- aimodelshare/readme.md +26 -0
- aimodelshare/reproducibility.py +181 -0
- aimodelshare/sam/Dockerfile.txt +8 -0
- aimodelshare/sam/Dockerfile_PySpark.txt +24 -0
- aimodelshare/sam/__init__.py +1 -0
- aimodelshare/sam/buildspec.txt +11 -0
- aimodelshare/sam/codebuild_policies.txt +129 -0
- aimodelshare/sam/codebuild_trust_relationship.txt +12 -0
- aimodelshare/sam/codepipeline_policies.txt +173 -0
- aimodelshare/sam/codepipeline_trust_relationship.txt +12 -0
- aimodelshare/sam/spark-class.txt +2 -0
- aimodelshare/sam/template.txt +54 -0
- aimodelshare/tools.py +103 -0
- aimodelshare/utils/__init__.py +78 -0
- aimodelshare/utils/optional_deps.py +38 -0
- aimodelshare/utils.py +57 -0
- aimodelshare-0.3.7.dist-info/METADATA +298 -0
- aimodelshare-0.3.7.dist-info/RECORD +171 -0
- aimodelshare-0.3.7.dist-info/WHEEL +5 -0
- aimodelshare-0.3.7.dist-info/licenses/LICENSE +5 -0
- aimodelshare-0.3.7.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,620 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import gzip
|
|
4
|
+
from io import BytesIO
|
|
5
|
+
import json
|
|
6
|
+
import shutil
|
|
7
|
+
import requests
|
|
8
|
+
import tempfile
|
|
9
|
+
import tarfile
|
|
10
|
+
import urllib3
|
|
11
|
+
import re
|
|
12
|
+
from aimodelshare.data_sharing.utils import redo_with_write
|
|
13
|
+
|
|
14
|
+
urllib3.disable_warnings()
|
|
15
|
+
|
|
16
|
+
def get_auth_head_no_aws_auth(auth_url, registry, repository, type):
|
|
17
|
+
resp = requests.get('{}?service={}&scope=repository:{}:pull'.format(auth_url, registry, repository), verify=False)
|
|
18
|
+
access_token = resp.json()['token']
|
|
19
|
+
auth_head = {'Authorization':'Bearer '+ access_token, 'Accept': type}
|
|
20
|
+
return auth_head
|
|
21
|
+
|
|
22
|
+
def progress_bar(layer_label, nb_traits):
|
|
23
|
+
sys.stdout.write('\r' + layer_label + 'Downloading [')
|
|
24
|
+
for i in range(0, nb_traits):
|
|
25
|
+
if i == nb_traits - 1:
|
|
26
|
+
sys.stdout.write('>')
|
|
27
|
+
else:
|
|
28
|
+
sys.stdout.write('=')
|
|
29
|
+
for i in range(0, 49 - nb_traits):
|
|
30
|
+
sys.stdout.write(' ')
|
|
31
|
+
sys.stdout.write(']')
|
|
32
|
+
sys.stdout.flush()
|
|
33
|
+
|
|
34
|
+
def get_auth_url(registry): # to do with auth
|
|
35
|
+
return 'https://' + registry + '/token/' # no aws auth
|
|
36
|
+
|
|
37
|
+
def get_auth_head(auth_url, registry, repository):
|
|
38
|
+
# Broaden Accept header to allow manifest list / OCI fallbacks
|
|
39
|
+
return get_auth_head_no_aws_auth(
|
|
40
|
+
auth_url,
|
|
41
|
+
registry,
|
|
42
|
+
repository,
|
|
43
|
+
('application/vnd.docker.distribution.manifest.v2+json,'
|
|
44
|
+
'application/vnd.docker.distribution.manifest.list.v2+json,'
|
|
45
|
+
'application/vnd.oci.image.manifest.v1+json')
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
def _fetch_concrete_manifest(registry, repository, tag_or_digest, auth_head):
|
|
49
|
+
"""Fetch a concrete image manifest (not a list)."""
|
|
50
|
+
resp = requests.get(
|
|
51
|
+
f'https://{registry}/v2/{repository}/manifests/{tag_or_digest}',
|
|
52
|
+
headers=auth_head,
|
|
53
|
+
verify=False
|
|
54
|
+
)
|
|
55
|
+
if not resp.ok:
|
|
56
|
+
raise RuntimeError(
|
|
57
|
+
f"Failed to fetch manifest {tag_or_digest} (status {resp.status_code}): {resp.text[:300]}"
|
|
58
|
+
)
|
|
59
|
+
return resp
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def download_layer(layer, layer_count, tmp_img_dir, blobs_resp):
|
|
63
|
+
|
|
64
|
+
ublob = layer['digest']
|
|
65
|
+
layer_id = 'layer_' + str(layer_count) + '_' + ublob[7:]
|
|
66
|
+
layer_label = ""
|
|
67
|
+
layer_dir = tmp_img_dir + '/' + layer_id
|
|
68
|
+
|
|
69
|
+
# Creating layer.tar file
|
|
70
|
+
sys.stdout.write(layer_label + 'Downloading...')
|
|
71
|
+
sys.stdout.flush()
|
|
72
|
+
|
|
73
|
+
# Stream download and follow the progress
|
|
74
|
+
unit = int(blobs_resp.headers['Content-Length']) / 50
|
|
75
|
+
acc = 0
|
|
76
|
+
nb_traits = 0
|
|
77
|
+
progress_bar(layer_label, nb_traits)
|
|
78
|
+
|
|
79
|
+
os.mkdir(layer_dir)
|
|
80
|
+
with open(layer_dir + '/layer_gzip.tar', "wb") as file:
|
|
81
|
+
for chunk in blobs_resp.iter_content(chunk_size=8192):
|
|
82
|
+
if chunk:
|
|
83
|
+
file.write(chunk)
|
|
84
|
+
acc = acc + 8192
|
|
85
|
+
if acc > unit:
|
|
86
|
+
nb_traits = nb_traits + 1
|
|
87
|
+
progress_bar(layer_label, nb_traits)
|
|
88
|
+
acc = 0
|
|
89
|
+
|
|
90
|
+
sys.stdout.flush()
|
|
91
|
+
|
|
92
|
+
with open(layer_dir + '/layer.tar', "wb") as file:
|
|
93
|
+
unzip_layer = gzip.open(layer_dir + '/layer_gzip.tar','rb')
|
|
94
|
+
shutil.copyfileobj(unzip_layer, file)
|
|
95
|
+
unzip_layer.close()
|
|
96
|
+
os.remove(layer_dir + '/layer_gzip.tar')
|
|
97
|
+
|
|
98
|
+
return layer_id, layer_dir
|
|
99
|
+
|
|
100
|
+
def pull_image(image_uri):
|
|
101
|
+
image_uri_parts = image_uri.split('/')
|
|
102
|
+
registry = image_uri_parts[0]
|
|
103
|
+
image, tag = image_uri_parts[2].split(':')
|
|
104
|
+
repository = '/'.join([image_uri_parts[1], image])
|
|
105
|
+
|
|
106
|
+
auth_url = get_auth_url(registry)
|
|
107
|
+
auth_head = get_auth_head(auth_url, registry, repository)
|
|
108
|
+
|
|
109
|
+
# 1. Fetch initial manifest (may be list or concrete)
|
|
110
|
+
resp = _fetch_concrete_manifest(registry, repository, tag, auth_head)
|
|
111
|
+
manifest_json = resp.json()
|
|
112
|
+
|
|
113
|
+
# 2. Handle manifest list fallback
|
|
114
|
+
if 'config' not in manifest_json:
|
|
115
|
+
if 'manifests' in manifest_json:
|
|
116
|
+
# Choose amd64 if available, else first
|
|
117
|
+
chosen = None
|
|
118
|
+
for m in manifest_json['manifests']:
|
|
119
|
+
arch = (m.get('platform') or {}).get('architecture')
|
|
120
|
+
if arch in ('amd64', 'x86_64'):
|
|
121
|
+
chosen = m
|
|
122
|
+
break
|
|
123
|
+
if chosen is None:
|
|
124
|
+
chosen = manifest_json['manifests'][0]
|
|
125
|
+
digest = chosen['digest']
|
|
126
|
+
# Re-auth to avoid token expiry
|
|
127
|
+
auth_head = get_auth_head(auth_url, registry, repository)
|
|
128
|
+
resp = _fetch_concrete_manifest(registry, repository, digest, auth_head)
|
|
129
|
+
manifest_json = resp.json()
|
|
130
|
+
else:
|
|
131
|
+
raise KeyError(
|
|
132
|
+
f"Manifest does not contain 'config' or 'manifests'. Keys: {list(manifest_json.keys())}"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
if 'config' not in manifest_json or 'layers' not in manifest_json:
|
|
136
|
+
raise KeyError(
|
|
137
|
+
f"Unexpected manifest shape. Keys: {list(manifest_json.keys())}"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
config = manifest_json['config']['digest']
|
|
141
|
+
config_resp = requests.get(
|
|
142
|
+
f'https://{registry}/v2/{repository}/blobs/{config}',
|
|
143
|
+
headers=auth_head,
|
|
144
|
+
verify=False
|
|
145
|
+
)
|
|
146
|
+
if not config_resp.ok:
|
|
147
|
+
raise RuntimeError(
|
|
148
|
+
f"Failed to fetch config blob {config} (status {config_resp.status_code}): {config_resp.text[:300]}"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
tmp_img_dir = tempfile.gettempdir() + '/' + f'tmp_{image}_{tag}'
|
|
152
|
+
os.mkdir(tmp_img_dir)
|
|
153
|
+
|
|
154
|
+
with open(f'{tmp_img_dir}/{config[7:]}.json', 'wb') as f:
|
|
155
|
+
f.write(config_resp.content)
|
|
156
|
+
|
|
157
|
+
content = [{
|
|
158
|
+
'Config': config[7:] + '.json',
|
|
159
|
+
'RepoTags': [image_uri],
|
|
160
|
+
'Layers': []
|
|
161
|
+
}]
|
|
162
|
+
|
|
163
|
+
layer_count = 0
|
|
164
|
+
layers = manifest_json['layers'] # removed [6:] slicing
|
|
165
|
+
|
|
166
|
+
for layer in layers:
|
|
167
|
+
layer_count += 1
|
|
168
|
+
# Refresh auth (avoid expiry)
|
|
169
|
+
auth_head = get_auth_head(auth_url, registry, repository)
|
|
170
|
+
blobs_resp = requests.get(
|
|
171
|
+
f'https://{registry}/v2/{repository}/blobs/{layer["digest"]}',
|
|
172
|
+
headers=auth_head,
|
|
173
|
+
stream=True,
|
|
174
|
+
verify=False
|
|
175
|
+
)
|
|
176
|
+
if not blobs_resp.ok:
|
|
177
|
+
raise RuntimeError(
|
|
178
|
+
f"Failed to stream layer {layer['digest']} status {blobs_resp.status_code}: {blobs_resp.text[:200]}"
|
|
179
|
+
)
|
|
180
|
+
layer_id, layer_dir = download_layer(layer, layer_count, tmp_img_dir, blobs_resp)
|
|
181
|
+
content[0]['Layers'].append(layer_id + '/layer.tar')
|
|
182
|
+
|
|
183
|
+
# Create layer json
|
|
184
|
+
with open(layer_dir + '/json', 'w') as fjson:
|
|
185
|
+
if layers[-1]['digest'] == layer['digest']:
|
|
186
|
+
json_obj = json.loads(config_resp.content)
|
|
187
|
+
json_obj.pop('history', None)
|
|
188
|
+
json_obj.pop('rootfs', None)
|
|
189
|
+
else:
|
|
190
|
+
json_obj = {}
|
|
191
|
+
json_obj['id'] = layer_id
|
|
192
|
+
fjson.write(json.dumps(json_obj))
|
|
193
|
+
|
|
194
|
+
with open(tmp_img_dir + '/manifest.json', 'w') as mf:
|
|
195
|
+
mf.write(json.dumps(content))
|
|
196
|
+
|
|
197
|
+
# repositories file
|
|
198
|
+
repositories_json = {
|
|
199
|
+
'/'.join(image_uri_parts[:-1]) + '/' + image: {tag: layer_id}
|
|
200
|
+
}
|
|
201
|
+
with open(tmp_img_dir + '/repositories', 'w') as rf:
|
|
202
|
+
rf.write(json.dumps(repositories_json))
|
|
203
|
+
|
|
204
|
+
docker_tar = tempfile.gettempdir() + '/' + '_'.join([repository.replace('/', '_'), tag]) + '.tar'
|
|
205
|
+
tar = tarfile.open(docker_tar, "w")
|
|
206
|
+
tar.add(tmp_img_dir, arcname=os.path.sep)
|
|
207
|
+
tar.close()
|
|
208
|
+
shutil.rmtree(tmp_img_dir, onerror=redo_with_write)
|
|
209
|
+
return docker_tar
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def extract_data_from_image(image_name, file_name, location):
|
|
213
|
+
tar = tarfile.open(image_name, 'r')
|
|
214
|
+
files = []
|
|
215
|
+
for t in tar.getmembers():
|
|
216
|
+
if('.tar' not in t.name):
|
|
217
|
+
continue
|
|
218
|
+
tar_layer = tarfile.open(fileobj=tar.extractfile(t))
|
|
219
|
+
for tl in tar_layer.getmembers():
|
|
220
|
+
if(re.match("var/task/"+file_name, tl.name)):
|
|
221
|
+
files.append(tl)
|
|
222
|
+
if(len(files)>0):
|
|
223
|
+
break
|
|
224
|
+
tar_layer.extractall(members=files, path=tempfile.gettempdir())
|
|
225
|
+
if(os.path.isdir(file_name)):
|
|
226
|
+
shutil.rmtree(file_name, onerror=redo_with_write)
|
|
227
|
+
shutil.copytree(tempfile.gettempdir()+'/var/task/'+file_name, os.path.join(location, file_name))
|
|
228
|
+
shutil.rmtree(tempfile.gettempdir()+'/var', onerror=redo_with_write)
|
|
229
|
+
|
|
230
|
+
def download_data(repository, location="./"):
|
|
231
|
+
data_zip_name = repository.split('/')[2].split('-repository')[0]
|
|
232
|
+
docker_tar = pull_image(repository)
|
|
233
|
+
extract_data_from_image(docker_tar, data_zip_name, location)
|
|
234
|
+
os.remove(docker_tar)
|
|
235
|
+
print('\n\nData downloaded successfully.')
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def import_quickstart_data(tutorial, section="modelplayground"):
|
|
239
|
+
from aimodelshare.data_sharing.download_data import download_data
|
|
240
|
+
import tensorflow as tf
|
|
241
|
+
import os
|
|
242
|
+
import pickle
|
|
243
|
+
import shutil
|
|
244
|
+
import pandas as pd
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
#Download Quick Start materials
|
|
248
|
+
if all([tutorial == "flowers", section == "modelplayground"]):
|
|
249
|
+
quickstart_repository = "public.ecr.aws/z5w0c9e9/quickstart_materials-repository:latest"
|
|
250
|
+
existing_folder = 'flower_competition_data'
|
|
251
|
+
|
|
252
|
+
if all([tutorial == "flowers", section == "competition"]):
|
|
253
|
+
quickstart_repository = "public.ecr.aws/z5w0c9e9/quickstart_flowers_competition-repository:latest"
|
|
254
|
+
existing_folder = 'flower_competition_data'
|
|
255
|
+
|
|
256
|
+
if all([tutorial == "mnist", section == "modelplayground"]):
|
|
257
|
+
quickstart_repository = "public.ecr.aws/y2e2a1d6/fashion_mnist_quickstart_materials-repository:latest"
|
|
258
|
+
existing_folder = 'fashion_mnist_competition_data'
|
|
259
|
+
if all([tutorial == "mnist", section == "competition"]):
|
|
260
|
+
quickstart_repository = "public.ecr.aws/y2e2a1d6/quickstart_mnist_competition-repository:latest"
|
|
261
|
+
existing_folder = 'fashion_mnist_competition_data'
|
|
262
|
+
|
|
263
|
+
if all([tutorial == "titanic", section == "modelplayground"]):
|
|
264
|
+
quickstart_repository = "public.ecr.aws/y2e2a1d6/titanic_quickstart-repository:latest"
|
|
265
|
+
existing_folder = 'titanic_competition_data'
|
|
266
|
+
|
|
267
|
+
if all([tutorial == "cars", section == "modelplayground"]):
|
|
268
|
+
quickstart_repository = "public.ecr.aws/z5w0c9e9/quickstart_car_sales_competition-repository:latest"
|
|
269
|
+
existing_folder = 'used_car_competition_data'
|
|
270
|
+
|
|
271
|
+
if all([tutorial == "clickbait", section == "modelplayground"]):
|
|
272
|
+
quickstart_repository = "public.ecr.aws/y2e2a1d6/quickstart_clickbait_materials-repository:latest"
|
|
273
|
+
existing_folder = 'clickbait_competition_data'
|
|
274
|
+
|
|
275
|
+
if all([tutorial == "covid_tweets", section == "modelplayground"]):
|
|
276
|
+
quickstart_repository = "public.ecr.aws/y2e2a1d6/quickstart_covid_competition-repository:latest"
|
|
277
|
+
existing_folder = 'covid_tweet_competition_data'
|
|
278
|
+
|
|
279
|
+
if all([tutorial == "sports", section == "modelplayground"]):
|
|
280
|
+
quickstart_repository = "public.ecr.aws/y2e2a1d6/sports_quick_start_materials-repository:latest"
|
|
281
|
+
existing_folder = 'sports_clips_competition_data'
|
|
282
|
+
if all([tutorial == "sports", section == "competition"]):
|
|
283
|
+
quickstart_repository = "public.ecr.aws/y2e2a1d6/quickstart_sports_competition-repository:latest"
|
|
284
|
+
existing_folder = 'sports_clips_competition_data'
|
|
285
|
+
|
|
286
|
+
if all([tutorial == "dogs", section == "modelplayground"]):
|
|
287
|
+
quickstart_repository = "public.ecr.aws/y2e2a1d6/dog_breed_quickstart_materials-repository:latest"
|
|
288
|
+
existing_folder = 'dog_competition_data'
|
|
289
|
+
if all([tutorial == "dogs", section == "competition"]):
|
|
290
|
+
quickstart_repository = "public.ecr.aws/y2e2a1d6/quickstart_dog_breed_competition-repository:latest"
|
|
291
|
+
existing_folder = 'dog_competition_data'
|
|
292
|
+
|
|
293
|
+
if all([tutorial == "imdb", section == "modelplayground"]):
|
|
294
|
+
quickstart_repository = "public.ecr.aws/z5w0c9e9/imdb_quickstart_materials-repository:latest"
|
|
295
|
+
existing_folder = 'imdb_competition_data'
|
|
296
|
+
|
|
297
|
+
download_data(quickstart_repository)
|
|
298
|
+
|
|
299
|
+
#Delete pre-existing tutorial folders
|
|
300
|
+
if os.path.exists(existing_folder):
|
|
301
|
+
shutil.rmtree(existing_folder, onerror=redo_with_write)
|
|
302
|
+
|
|
303
|
+
#{{{ Prepare modelplayground materials
|
|
304
|
+
if section == "modelplayground":
|
|
305
|
+
print("\nPreparing downloaded files for use...")
|
|
306
|
+
|
|
307
|
+
if tutorial == "dogs":
|
|
308
|
+
#instantiate model
|
|
309
|
+
model = tf.keras.models.load_model('dog_breed_quickstart_materials/model.h5')
|
|
310
|
+
|
|
311
|
+
#unpack data
|
|
312
|
+
y_train = pd.read_csv("dog_breed_quickstart_materials/y_train.csv")
|
|
313
|
+
|
|
314
|
+
if tutorial == "covid_tweets":
|
|
315
|
+
#unpack data
|
|
316
|
+
X_train = pd.read_csv("quickstart_covid_competition/X_train.csv").squeeze("columns")
|
|
317
|
+
X_test = pd.read_csv("quickstart_covid_competition/X_test.csv").squeeze("columns")
|
|
318
|
+
y_test_labels = pd.read_csv("quickstart_covid_competition/y_test_labels.csv").squeeze("columns")
|
|
319
|
+
y_train_labels = pd.read_csv("quickstart_covid_competition/y_train_labels.csv").squeeze("columns")
|
|
320
|
+
# example data
|
|
321
|
+
example_data = X_train[50:55]
|
|
322
|
+
|
|
323
|
+
#move data files to cometition folder
|
|
324
|
+
os.mkdir('covid_tweet_competition_data')
|
|
325
|
+
|
|
326
|
+
files = ['quickstart_covid_competition/X_train.csv',
|
|
327
|
+
'quickstart_covid_competition/X_test.csv',
|
|
328
|
+
'quickstart_covid_competition/y_train_labels.csv']
|
|
329
|
+
|
|
330
|
+
for f in files:
|
|
331
|
+
shutil.move(f, 'covid_tweet_competition_data')
|
|
332
|
+
|
|
333
|
+
if tutorial == "imdb":
|
|
334
|
+
#unpack data
|
|
335
|
+
X_train = pd.read_csv("imdb_quickstart_materials/X_train.csv").squeeze("columns")
|
|
336
|
+
X_test = pd.read_csv("imdb_quickstart_materials/X_test.csv").squeeze("columns")
|
|
337
|
+
with open("imdb_quickstart_materials/y_train_labels.json", "r") as f:
|
|
338
|
+
y_train_labels = json.load(f)
|
|
339
|
+
with open("imdb_quickstart_materials/y_test_labels.json", "r") as f:
|
|
340
|
+
y_test_labels = json.load(f)
|
|
341
|
+
import pandas as pd
|
|
342
|
+
y_train_labels=pd.Series(y_train_labels)
|
|
343
|
+
y_test_labels=pd.Series(y_test_labels)
|
|
344
|
+
# example data
|
|
345
|
+
example_data = X_train[50:55]
|
|
346
|
+
|
|
347
|
+
#instantiate models
|
|
348
|
+
lstm_model = tf.keras.models.load_model('imdb_quickstart_materials/model_1.h5')
|
|
349
|
+
lstm_model2 = tf.keras.models.load_model('imdb_quickstart_materials/model_2.h5')
|
|
350
|
+
|
|
351
|
+
#move data files to cometition folder
|
|
352
|
+
os.mkdir('imdb_competition_data')
|
|
353
|
+
|
|
354
|
+
files = ['imdb_quickstart_materials/X_train.csv',
|
|
355
|
+
'imdb_quickstart_materials/X_test.csv',
|
|
356
|
+
'imdb_quickstart_materials/y_train_labels.csv']
|
|
357
|
+
|
|
358
|
+
for f in files:
|
|
359
|
+
shutil.move(f, 'imdb_competition_data')
|
|
360
|
+
|
|
361
|
+
if tutorial == "flowers":
|
|
362
|
+
#instantiate model
|
|
363
|
+
model = tf.keras.models.load_model('quickstart_materials/flowermodel.h5')
|
|
364
|
+
|
|
365
|
+
#unpack data
|
|
366
|
+
with open("quickstart_materials/y_train_labels.txt", "rb") as fp:
|
|
367
|
+
y_train_labels = pickle.load(fp)
|
|
368
|
+
|
|
369
|
+
if tutorial == "mnist":
|
|
370
|
+
#instantiate model
|
|
371
|
+
model = tf.keras.models.load_model('fashion_mnist_quickstart_materials/mnist_model_1.h5')
|
|
372
|
+
|
|
373
|
+
#unpack data
|
|
374
|
+
with open("fashion_mnist_quickstart_materials/y_train_labels.pkl", "rb") as fp:
|
|
375
|
+
y_train_labels = pickle.load(fp)
|
|
376
|
+
|
|
377
|
+
if tutorial == "sports":
|
|
378
|
+
#instantiate model
|
|
379
|
+
model = tf.keras.models.load_model('sports_quick_start_materials/video_1.h5')
|
|
380
|
+
|
|
381
|
+
#unpack data
|
|
382
|
+
y_train_labels = pd.read_csv("sports_quick_start_materials/y_train.csv")
|
|
383
|
+
|
|
384
|
+
if tutorial == "clickbait":
|
|
385
|
+
import pandas as pd
|
|
386
|
+
# suppress tf warning
|
|
387
|
+
import tensorflow as tf
|
|
388
|
+
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
|
|
389
|
+
|
|
390
|
+
#instantiate models
|
|
391
|
+
lstm_model = tf.keras.models.load_model('quickstart_clickbait_materials/lstm_model1.h5')
|
|
392
|
+
lstm_model2 = tf.keras.models.load_model('quickstart_clickbait_materials/lstm_model2.h5')
|
|
393
|
+
|
|
394
|
+
# bring in data
|
|
395
|
+
clickbait = pd.read_csv('quickstart_clickbait_materials/clickbait_data', sep="\n", header = None)
|
|
396
|
+
clickbait['label'] = "clickbait"
|
|
397
|
+
clickbait.columns = ['headline', 'label']
|
|
398
|
+
|
|
399
|
+
not_clickbait = pd.read_csv('quickstart_clickbait_materials/non_clickbait_data', sep="\n", header = None)
|
|
400
|
+
not_clickbait['label'] = "not clickbait"
|
|
401
|
+
not_clickbait.columns = ['headline', 'label']
|
|
402
|
+
|
|
403
|
+
# train/test/split
|
|
404
|
+
from sklearn.model_selection import train_test_split
|
|
405
|
+
X = clickbait.append(not_clickbait)
|
|
406
|
+
y = X['label']
|
|
407
|
+
X = X.drop(['label'], axis=1)
|
|
408
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=942)
|
|
409
|
+
X_test.reset_index(drop=True, inplace=True)
|
|
410
|
+
X_test = X_test.squeeze()
|
|
411
|
+
X_train.reset_index(drop=True, inplace=True)
|
|
412
|
+
X_train = X_train.squeeze()
|
|
413
|
+
|
|
414
|
+
# exampledata
|
|
415
|
+
example_data = X_train[0:5]
|
|
416
|
+
|
|
417
|
+
# Create data directory for competition
|
|
418
|
+
X_train.to_csv("X_train.csv", index=False)
|
|
419
|
+
X_test.to_csv("X_test.csv", index=False)
|
|
420
|
+
y_train.to_csv("y_train.csv", index=False)
|
|
421
|
+
|
|
422
|
+
os.mkdir('clickbait_competition_data')
|
|
423
|
+
|
|
424
|
+
files = ['X_train.csv', 'X_test.csv', 'y_train.csv']
|
|
425
|
+
|
|
426
|
+
for f in files:
|
|
427
|
+
shutil.move(f, 'clickbait_competition_data')
|
|
428
|
+
|
|
429
|
+
if tutorial == "titanic":
|
|
430
|
+
from sklearn.model_selection import train_test_split
|
|
431
|
+
import pandas as pd
|
|
432
|
+
#read in data
|
|
433
|
+
data = pd.read_csv("titanic_quickstart/titanic_data.csv")
|
|
434
|
+
y = data['survived']
|
|
435
|
+
y = y.map({0: 'died', 1: 'survived'})
|
|
436
|
+
X = data.drop(['survived','sibsp','parch','ticket','name','cabin','boat','body','home.dest'], axis=1)
|
|
437
|
+
#create subset as exampledata
|
|
438
|
+
example_data = pd.DataFrame(X[0:4])
|
|
439
|
+
# create data directory for competition
|
|
440
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
|
441
|
+
training_data = pd.merge(X_train, y_train, left_index=True, right_index=True)
|
|
442
|
+
training_data = training_data.drop(list(training_data.filter(like='Unnamed')), axis=1)
|
|
443
|
+
training_data.to_csv("training_data.csv", index=False)
|
|
444
|
+
|
|
445
|
+
test_data = X_test
|
|
446
|
+
test_data = test_data.drop(list(test_data.filter(like='Unnamed')), axis=1)
|
|
447
|
+
test_data.to_csv("test_data.csv", index=False)
|
|
448
|
+
|
|
449
|
+
os.mkdir('titanic_competition_data')
|
|
450
|
+
files = ['training_data.csv',
|
|
451
|
+
'test_data.csv']
|
|
452
|
+
|
|
453
|
+
for f in files:
|
|
454
|
+
shutil.move(f, 'titanic_competition_data')
|
|
455
|
+
|
|
456
|
+
#make y_test_labels for competition
|
|
457
|
+
y_test_labels = y_test.to_list()
|
|
458
|
+
|
|
459
|
+
if tutorial == "cars":
|
|
460
|
+
from sklearn.model_selection import train_test_split
|
|
461
|
+
|
|
462
|
+
# read in data
|
|
463
|
+
import pandas as pd
|
|
464
|
+
data = pd.read_csv("quickstart_car_sales_competition/used_car_dataset.csv")
|
|
465
|
+
y = data['selling_price']
|
|
466
|
+
X = data.drop(['selling_price', 'torque', 'name'], axis=1)
|
|
467
|
+
|
|
468
|
+
#Data Prep:
|
|
469
|
+
# convert rupees to $ (for smaller MSEs)
|
|
470
|
+
y = y.mul(.014)
|
|
471
|
+
# A: Split units from mileage and convert units
|
|
472
|
+
Correct_Mileage= []
|
|
473
|
+
for i in X.mileage:
|
|
474
|
+
if str(i).endswith('km/kg'):
|
|
475
|
+
i = i[:-6]
|
|
476
|
+
i = float(i)*1.40
|
|
477
|
+
Correct_Mileage.append(float(i))
|
|
478
|
+
elif str(i).endswith('kmpl'):
|
|
479
|
+
i = i[:-5]
|
|
480
|
+
Correct_Mileage.append(float(i))
|
|
481
|
+
else:
|
|
482
|
+
Correct_Mileage.append(None)
|
|
483
|
+
X['mileage']=Correct_Mileage
|
|
484
|
+
|
|
485
|
+
#B: Split units from engine, & max_pwer
|
|
486
|
+
X['engine'] = X['engine'].str.replace(' CC', '')
|
|
487
|
+
X['engine'] = pd.to_numeric(X['engine'])
|
|
488
|
+
|
|
489
|
+
X['max_power'] = X['max_power'].str.replace(' bhp', '')
|
|
490
|
+
X['max_power'] = pd.to_numeric(X['max_power'])
|
|
491
|
+
|
|
492
|
+
#create subset as exampledata
|
|
493
|
+
example_data = pd.DataFrame(X[0:4])
|
|
494
|
+
|
|
495
|
+
#create data directory for competition
|
|
496
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
|
|
497
|
+
training_data = X_train
|
|
498
|
+
training_data = pd.merge(X_train, y_train, left_index=True, right_index=True)
|
|
499
|
+
training_data.to_csv("training_data.csv", index=False)
|
|
500
|
+
|
|
501
|
+
test_data = X_test
|
|
502
|
+
test_data.to_csv("test_data.csv", index=False)
|
|
503
|
+
|
|
504
|
+
os.mkdir('used_car_competition_data')
|
|
505
|
+
files = ['training_data.csv',
|
|
506
|
+
'test_data.csv']
|
|
507
|
+
for f in files:
|
|
508
|
+
shutil.move(f, 'used_car_competition_data')
|
|
509
|
+
#}}}
|
|
510
|
+
|
|
511
|
+
#{{{ prepare competition materials
|
|
512
|
+
if section == "competition":
|
|
513
|
+
print("\nPreparing downloaded files for use...")
|
|
514
|
+
|
|
515
|
+
if tutorial == "flowers":
|
|
516
|
+
#Instantiate Model
|
|
517
|
+
model_2 = tf.keras.models.load_model('quickstart_flowers_competition/flowermodel_2.h5')
|
|
518
|
+
|
|
519
|
+
#unpack data
|
|
520
|
+
with open("quickstart_flowers_competition/y_test_labels.txt", "rb") as fp:
|
|
521
|
+
y_test_labels = pickle.load(fp)
|
|
522
|
+
|
|
523
|
+
#move data files to folder to upload with create_competiton
|
|
524
|
+
os.mkdir('flower_competition_data')
|
|
525
|
+
|
|
526
|
+
folders = ['quickstart_flowers_competition/test_images',
|
|
527
|
+
'quickstart_flowers_competition/train_images']
|
|
528
|
+
|
|
529
|
+
for f in folders:
|
|
530
|
+
shutil.move(f, 'flower_competition_data')
|
|
531
|
+
|
|
532
|
+
if tutorial == "mnist":
|
|
533
|
+
#Instantiate Model
|
|
534
|
+
model_2 = tf.keras.models.load_model('quickstart_mnist_competition/mnist_model_2.h5')
|
|
535
|
+
|
|
536
|
+
#unpack data
|
|
537
|
+
with open("quickstart_mnist_competition/y_test_labels.pkl", "rb") as fp:
|
|
538
|
+
y_test_labels = pickle.load(fp)
|
|
539
|
+
|
|
540
|
+
#move data files to folder to upload with create_competiton
|
|
541
|
+
os.mkdir('fashion_mnist_competition_data')
|
|
542
|
+
|
|
543
|
+
folders = ['quickstart_mnist_competition/test_data',
|
|
544
|
+
'quickstart_mnist_competition/training_data',
|
|
545
|
+
'fashion_mnist_quickstart_materials/y_train_labels.pkl']
|
|
546
|
+
|
|
547
|
+
for f in folders:
|
|
548
|
+
shutil.move(f, 'fashion_mnist_competition_data')
|
|
549
|
+
|
|
550
|
+
if tutorial == "dogs":
|
|
551
|
+
#Instantiate Model
|
|
552
|
+
model_2 = tf.keras.models.load_model('quickstart_dog_breed_competition/model_2.h5')
|
|
553
|
+
|
|
554
|
+
#unpack data
|
|
555
|
+
with open("quickstart_dog_breed_competition/y_test_labels.txt", "rb") as fp:
|
|
556
|
+
y_test_labels = pickle.load(fp)
|
|
557
|
+
|
|
558
|
+
#move data files to folder to upload with create_competiton
|
|
559
|
+
os.mkdir('dog_competition_data')
|
|
560
|
+
|
|
561
|
+
folders = ['quickstart_dog_breed_competition/dog_breed_competition_data/test_images',
|
|
562
|
+
'quickstart_dog_breed_competition/dog_breed_competition_data/train_images']
|
|
563
|
+
|
|
564
|
+
for f in folders:
|
|
565
|
+
shutil.move(f, 'dog_competition_data')
|
|
566
|
+
|
|
567
|
+
if tutorial == "sports":
|
|
568
|
+
model_2 = tf.keras.models.load_model('quickstart_sports_competition/video_2.h5')
|
|
569
|
+
y_test = pd.read_csv("quickstart_sports_competition/y_test.csv")
|
|
570
|
+
y_test_labels = y_test.idxmax(axis=1)
|
|
571
|
+
os.mkdir('sports_clips_competition_data')
|
|
572
|
+
folders = ['quickstart_sports_competition/clips_test.zip',
|
|
573
|
+
'quickstart_sports_competition/clips_train.zip']
|
|
574
|
+
for f in folders:
|
|
575
|
+
shutil.move(f, 'sports_clips_competition_data')
|
|
576
|
+
#}}}
|
|
577
|
+
|
|
578
|
+
success_message = ("\nSuccess! Your Quick Start materials have been downloaded. \n"
|
|
579
|
+
"You are now ready to run the tutorial.")
|
|
580
|
+
|
|
581
|
+
print(success_message)
|
|
582
|
+
|
|
583
|
+
if all([tutorial == "flowers", section == "modelplayground"]):
|
|
584
|
+
return model, y_train_labels
|
|
585
|
+
|
|
586
|
+
if all ([tutorial == "flowers", section == "competition"]):
|
|
587
|
+
return model_2, y_test_labels
|
|
588
|
+
|
|
589
|
+
if all([tutorial == "mnist", section == "modelplayground"]):
|
|
590
|
+
return model, y_train_labels
|
|
591
|
+
|
|
592
|
+
if all([tutorial == "mnist", section == "competition"]):
|
|
593
|
+
return model_2, y_test_labels
|
|
594
|
+
|
|
595
|
+
if all([tutorial == "dogs", section == "modelplayground"]):
|
|
596
|
+
return model, y_train
|
|
597
|
+
|
|
598
|
+
if all ([tutorial == "dogs", section == "competition"]):
|
|
599
|
+
return model_2, y_test_labels
|
|
600
|
+
|
|
601
|
+
if all([tutorial == "sports", section == "modelplayground"]):
|
|
602
|
+
return model, y_train_labels
|
|
603
|
+
|
|
604
|
+
if all ([tutorial == "sports", section == "competition"]):
|
|
605
|
+
return model_2, y_test, y_test_labels
|
|
606
|
+
|
|
607
|
+
if tutorial == "titanic":
|
|
608
|
+
return X_train, X_test, y_train, y_test, example_data, y_test_labels
|
|
609
|
+
|
|
610
|
+
if tutorial == "cars":
|
|
611
|
+
return X_train, X_test, y_train, y_test, example_data
|
|
612
|
+
|
|
613
|
+
if tutorial == "clickbait":
|
|
614
|
+
return X_train, X_test, y_train, y_test, example_data, lstm_model, lstm_model2
|
|
615
|
+
|
|
616
|
+
if tutorial == "imdb":
|
|
617
|
+
return X_train, X_test, y_train_labels, y_test_labels, example_data, lstm_model, lstm_model2
|
|
618
|
+
|
|
619
|
+
if tutorial == "covid_tweets":
|
|
620
|
+
return X_train, X_test, y_train_labels, y_test_labels, example_data
|