cached-openai 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cached_openai/__init__.py +10 -0
- cached_openai/cached_client.py +431 -0
- cached_openai/main.py +150 -0
- cached_openai/materialize_utils.py +118 -0
- cached_openai/utils.py +194 -0
- cached_openai-0.0.1.dist-info/METADATA +115 -0
- cached_openai-0.0.1.dist-info/RECORD +9 -0
- cached_openai-0.0.1.dist-info/WHEEL +4 -0
- cached_openai-0.0.1.dist-info/licenses/LICENSE +7 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Make the main entrypoints into the package available at the top level of the package
|
|
2
|
+
from .main import OpenAI, AsyncOpenAI, DEV_MODE
|
|
3
|
+
|
|
4
|
+
# Make the materialization functions available at the top level of the package, if
|
|
5
|
+
# we're in dev mode
|
|
6
|
+
if DEV_MODE:
|
|
7
|
+
from .main import materialize
|
|
8
|
+
|
|
9
|
+
# Remove DEV_MODE from the namespace
|
|
10
|
+
del DEV_MODE
|
|
@@ -0,0 +1,431 @@
|
|
|
1
|
+
import openai
|
|
2
|
+
import hashlib
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import base64
|
|
6
|
+
import pathlib
|
|
7
|
+
import time
|
|
8
|
+
import asyncio
|
|
9
|
+
import pickle
|
|
10
|
+
import copy
|
|
11
|
+
import requests
|
|
12
|
+
import io
|
|
13
|
+
import struct
|
|
14
|
+
import inspect
|
|
15
|
+
|
|
16
|
+
# There are some keywords that - when provided to an OpenAI function - do not change
|
|
17
|
+
# the result; we should ignore these completely when caching results
|
|
18
|
+
IRRELEVANT_KWARGS = ['timeout']
|
|
19
|
+
|
|
20
|
+
class CachedClient():
|
|
21
|
+
'''
|
|
22
|
+
This CachedClient object replicates the openai.OpenAI client object, but allows the loading
|
|
23
|
+
and saving of results to or from cache every time a request is made.
|
|
24
|
+
|
|
25
|
+
It can be created in two circumstances:
|
|
26
|
+
- When it is created by the user, it will be created with stem = []
|
|
27
|
+
- When the user accesses a method of this class, a new class is recursively created with
|
|
28
|
+
the stem extended by the attribute accessed. For example, if the user calls
|
|
29
|
+
client.chat.completion.create
|
|
30
|
+
the last CachedClient instance will have stem = ['chat', 'completion', 'create']. This
|
|
31
|
+
final instance can then be called, which will called the corresponding function in the
|
|
32
|
+
original OpenAI library
|
|
33
|
+
'''
|
|
34
|
+
|
|
35
|
+
def __init__(self,
|
|
36
|
+
api_key : str | None ,
|
|
37
|
+
cache : dict ,
|
|
38
|
+
verbose : bool ,
|
|
39
|
+
dev_mode : bool ,
|
|
40
|
+
is_async : bool ,
|
|
41
|
+
delay_responses : bool ,
|
|
42
|
+
temp_cache_file : str ,
|
|
43
|
+
used_keys_file : str ,
|
|
44
|
+
stem : list[str] = [] ,
|
|
45
|
+
last_entry_returned : dict = {} ):
|
|
46
|
+
|
|
47
|
+
# Store variables
|
|
48
|
+
self._api_key = api_key
|
|
49
|
+
self._cache = cache
|
|
50
|
+
self._verbose = verbose
|
|
51
|
+
self._dev_mode = dev_mode
|
|
52
|
+
self._is_async = is_async
|
|
53
|
+
self._delay_responses = delay_responses
|
|
54
|
+
self._temp_cache_file = temp_cache_file
|
|
55
|
+
self._used_keys_file = used_keys_file
|
|
56
|
+
self._stem = stem
|
|
57
|
+
|
|
58
|
+
# If we were not given an API key, check whether one is available in an openai_key.txt file
|
|
59
|
+
if self._api_key is None:
|
|
60
|
+
try:
|
|
61
|
+
with open('openai_key.txt', 'r') as f:
|
|
62
|
+
self._api_key = f.read().strip()
|
|
63
|
+
|
|
64
|
+
if verbose:
|
|
65
|
+
print('Read API key from openai_key.txt')
|
|
66
|
+
except:
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
# In some cases, we have multiple results for a single set of keys; this is so that
|
|
70
|
+
# we can simulate the "real" OpenAI API that would return different results every time
|
|
71
|
+
# it is run. Initialize a dictionary to store how many responses we've returned for a
|
|
72
|
+
# given key, so that we know the next one we should return next time it is called
|
|
73
|
+
self._last_entry_returned = last_entry_returned
|
|
74
|
+
|
|
75
|
+
def __getattr__(self, name : str):
|
|
76
|
+
'''
|
|
77
|
+
This function is called whenever an instance of this class is accessed with a .;
|
|
78
|
+
for example, client.chat.
|
|
79
|
+
|
|
80
|
+
When this happens, we add the attribute being accessed to self._stem, and return
|
|
81
|
+
a new CachedClient instance with that new stem.
|
|
82
|
+
'''
|
|
83
|
+
|
|
84
|
+
return CachedClient(api_key = self._api_key,
|
|
85
|
+
cache = self._cache,
|
|
86
|
+
verbose = self._verbose,
|
|
87
|
+
dev_mode = self._dev_mode,
|
|
88
|
+
is_async = self._is_async,
|
|
89
|
+
delay_responses = self._delay_responses,
|
|
90
|
+
temp_cache_file = self._temp_cache_file,
|
|
91
|
+
used_keys_file = self._used_keys_file,
|
|
92
|
+
stem = self._stem + [name],
|
|
93
|
+
last_entry_returned = self._last_entry_returned )
|
|
94
|
+
|
|
95
|
+
def get_cache_key(self, kwargs, hash_key : bool, strip_seed : bool = False):
|
|
96
|
+
'''
|
|
97
|
+
This function returns the cache key for the fuction described in self._stem called with
|
|
98
|
+
parameters kwargs. If hash_key is True, the JSon key will be hashed, otherwise it will
|
|
99
|
+
be returned raw
|
|
100
|
+
|
|
101
|
+
The following modifications are made:
|
|
102
|
+
- If strip_seed is True, the seed parameter is removed from the kwargs
|
|
103
|
+
- Any of the arguments in IRRELEVANT_KWARGS are in kwargs, they are removed
|
|
104
|
+
- If 'with_raw_response' is in the stem, it is stripped from it; when this is is included
|
|
105
|
+
in an OpenAI API call, it is because the developer wants to get the raw response, which
|
|
106
|
+
includes the number of token's left in the user's quota. It makes no sense to store this
|
|
107
|
+
in the cache as it will be different every time
|
|
108
|
+
'''
|
|
109
|
+
|
|
110
|
+
if strip_seed:
|
|
111
|
+
kwargs = {k:v for k,v in kwargs.items() if k != 'seed'}
|
|
112
|
+
|
|
113
|
+
this_stem = self._stem
|
|
114
|
+
this_stem = [i for i in this_stem if i != 'with_raw_response']
|
|
115
|
+
|
|
116
|
+
# Remove any irrelevant kwargs
|
|
117
|
+
kwargs = {k:v for k,v in kwargs.items() if k not in IRRELEVANT_KWARGS}
|
|
118
|
+
|
|
119
|
+
key = json.dumps({'stem':this_stem, 'kwargs':kwargs}, sort_keys=True)
|
|
120
|
+
if hash_key:
|
|
121
|
+
return hashlib.md5(key.encode('utf-8')).hexdigest()
|
|
122
|
+
else:
|
|
123
|
+
return key
|
|
124
|
+
|
|
125
|
+
def read_from_cache(self, kwargs):
|
|
126
|
+
'''
|
|
127
|
+
This function will attempt to read the cached result for the function described in self._stem
|
|
128
|
+
called with parameters kwargs.
|
|
129
|
+
|
|
130
|
+
It will return a dictionary with two entries:
|
|
131
|
+
- out : the entry in question
|
|
132
|
+
- run_time : the time the function took to run when it was initially added to the cache
|
|
133
|
+
If the entry is not found int he cache, None is returned
|
|
134
|
+
'''
|
|
135
|
+
|
|
136
|
+
# Remove any irrelevant kwargs
|
|
137
|
+
kwargs = {k:v for k,v in kwargs.items() if k not in IRRELEVANT_KWARGS}
|
|
138
|
+
|
|
139
|
+
# Try and the find the value in the cache; first, look for the raw JSon, and if it's not found
|
|
140
|
+
# look for the hashed key. Do NOT strip the seed - if the user intentionally added a seed argument,
|
|
141
|
+
# we want THAT entry specifically
|
|
142
|
+
key = self.get_cache_key(kwargs, hash_key = False)
|
|
143
|
+
if key not in self._cache:
|
|
144
|
+
key = self.get_cache_key(kwargs, hash_key = True)
|
|
145
|
+
|
|
146
|
+
# Check whether we have a result
|
|
147
|
+
if key in self._cache:
|
|
148
|
+
print('Found a saved result in the cache')
|
|
149
|
+
|
|
150
|
+
# Retrieve the entry from the cache
|
|
151
|
+
cache_entry = self._cache[key]
|
|
152
|
+
|
|
153
|
+
# Check whether this cache_entry is of one of two specific types:
|
|
154
|
+
# - A pointer (a dictionary with a single entry with the key 'TARGET'), pointing to
|
|
155
|
+
# another entry in the cache. If we have such an entry, we need to follow it
|
|
156
|
+
# - A list, in which case there are many possible results for this key, and we need
|
|
157
|
+
# to return the next one
|
|
158
|
+
|
|
159
|
+
while (type(cache_entry) == list) or ('TARGET' in cache_entry):
|
|
160
|
+
if type(cache_entry) == list:
|
|
161
|
+
# Find the next entry to retrieve from the list, wrapping back to the front of
|
|
162
|
+
# the list if we reach the end
|
|
163
|
+
self._last_entry_returned[key] = (self._last_entry_returned.get(key, -1) + 1) % len(cache_entry)
|
|
164
|
+
cache_entry = cache_entry[self._last_entry_returned[key]]
|
|
165
|
+
elif 'TARGET' in cache_entry:
|
|
166
|
+
# Log the fact we used this key, and then follow the pointer
|
|
167
|
+
if self._dev_mode:
|
|
168
|
+
with open(self._used_keys_file, 'a') as f : f.write(key + '\n')
|
|
169
|
+
|
|
170
|
+
key = cache_entry['TARGET']
|
|
171
|
+
cache_entry = self._cache[key]
|
|
172
|
+
|
|
173
|
+
# Record the fact we've used the key
|
|
174
|
+
if self._dev_mode:
|
|
175
|
+
with open(self._used_keys_file, 'a') as f: f.write(key + '\n')
|
|
176
|
+
|
|
177
|
+
# Retrieve the output that was saved from OpenAI
|
|
178
|
+
out = cache_entry['out']
|
|
179
|
+
|
|
180
|
+
# If the cache_entry contains a 'saved_images' entry, handle the images returned by the
|
|
181
|
+
# API
|
|
182
|
+
if 'saved_images' in cache_entry:
|
|
183
|
+
# Make sure we don't mutate the original object in the cache
|
|
184
|
+
out = copy.deepcopy(out)
|
|
185
|
+
|
|
186
|
+
# Get the saved images that were downloaded the cache
|
|
187
|
+
saved_images = cache_entry['saved_images']
|
|
188
|
+
|
|
189
|
+
# out.data is the entry in the OpenAI object that contains the image URLs. saved_images
|
|
190
|
+
# contains the actual image data. We want to save those images as a file, and replace
|
|
191
|
+
# the URL in out.data with the URL of the new file
|
|
192
|
+
for im, saved_im in zip(out.data, saved_images):
|
|
193
|
+
if saved_im is not None:
|
|
194
|
+
# Create a file name for this image based on the hash of the URL
|
|
195
|
+
file_name = hashlib.md5((key + im.url).encode('utf-8')).hexdigest() + '.png'
|
|
196
|
+
|
|
197
|
+
# Check whether the images folder exists; if not, create it
|
|
198
|
+
if not os.path.exists('images'):
|
|
199
|
+
os.mkdir('images')
|
|
200
|
+
|
|
201
|
+
# Save the image there
|
|
202
|
+
with open(f'images/{file_name}', 'wb') as f:
|
|
203
|
+
f.write(saved_im)
|
|
204
|
+
|
|
205
|
+
# Alter the URL in the output object
|
|
206
|
+
im.url = pathlib.Path(f'images/{file_name}').resolve().as_uri()
|
|
207
|
+
|
|
208
|
+
# If the cache_entry contains an 'audio_file' entry, deal with the audio file
|
|
209
|
+
if 'audio_file' in cache_entry:
|
|
210
|
+
# Create a class that will allow us to use a iter_bytes method and a stream_to_file
|
|
211
|
+
# method
|
|
212
|
+
class Stream:
|
|
213
|
+
def __init__(self, bytes):
|
|
214
|
+
self.byte_stream = io.ByteIO(bytes)
|
|
215
|
+
|
|
216
|
+
def iter_bytes(self):
|
|
217
|
+
while True:
|
|
218
|
+
chunk = self.byte_stream(1024)
|
|
219
|
+
if not chunk:
|
|
220
|
+
break
|
|
221
|
+
yield chunk
|
|
222
|
+
|
|
223
|
+
def stream_to_file(self, file_name):
|
|
224
|
+
with open(file_name, 'wb') as f:
|
|
225
|
+
f.write(self.byte_stream.getvalue())
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
if cache_entry['audio_file'][0] == 'old':
|
|
229
|
+
return {'out' : Stream(cache_entry['audio_file'][1]),
|
|
230
|
+
'run_time' : cache_entry['run_time'] }
|
|
231
|
+
|
|
232
|
+
elif cache_entry['audio_file'][0] == 'new':
|
|
233
|
+
# Create a class that we can use as a context manager to return the Stream
|
|
234
|
+
# object
|
|
235
|
+
class AudioFile:
|
|
236
|
+
def __enter__(self):
|
|
237
|
+
return Stream(cache_entry['audio_file'][1])
|
|
238
|
+
|
|
239
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
240
|
+
pass
|
|
241
|
+
|
|
242
|
+
def __call__(self):
|
|
243
|
+
return self
|
|
244
|
+
|
|
245
|
+
return {'out' : AudioFile(),
|
|
246
|
+
'run_time' : cache_entry['run_time']}
|
|
247
|
+
|
|
248
|
+
# If we reached this point, we don't have an audio file - return
|
|
249
|
+
return {'out' : out,
|
|
250
|
+
'run_time' : cache_entry['run_time']}
|
|
251
|
+
else:
|
|
252
|
+
if self._verbose:
|
|
253
|
+
print('No saved result found')
|
|
254
|
+
|
|
255
|
+
return None
|
|
256
|
+
|
|
257
|
+
def modify_cache(self, key, value):
|
|
258
|
+
'''
|
|
259
|
+
This function writes a specific key and value to the cache and the temporary cache
|
|
260
|
+
file, and records the fact the key has been used
|
|
261
|
+
'''
|
|
262
|
+
|
|
263
|
+
# Record in the cache
|
|
264
|
+
self._cache[key] = value
|
|
265
|
+
|
|
266
|
+
# Save the value to the temporary cache file
|
|
267
|
+
with open(self._temp_cache_file, 'ab') as f:
|
|
268
|
+
# Get the entry
|
|
269
|
+
entry = pickle.dumps([key, value])
|
|
270
|
+
|
|
271
|
+
# Write its length to the file
|
|
272
|
+
f.write(struct.pack('I', len(entry)))
|
|
273
|
+
|
|
274
|
+
# Then, write the entry
|
|
275
|
+
f.write(entry)
|
|
276
|
+
|
|
277
|
+
# Record the fact the key has been used
|
|
278
|
+
with open(self._used_keys_file, 'a') as f:
|
|
279
|
+
f.write(key + '\n')
|
|
280
|
+
|
|
281
|
+
def write_to_cache(self, kwargs, out, run_time):
|
|
282
|
+
'''
|
|
283
|
+
If we are in dev mode, this function will write the result of the function
|
|
284
|
+
described in self._stem called with parameters kwargs to the cache.
|
|
285
|
+
|
|
286
|
+
It also adds the result ot the temporary cache file
|
|
287
|
+
'''
|
|
288
|
+
|
|
289
|
+
if self._dev_mode:
|
|
290
|
+
if self._verbose:
|
|
291
|
+
print('Saving result to the cache')
|
|
292
|
+
|
|
293
|
+
# If we asked for the raw response from the OpenAI API, get the parsed response - we
|
|
294
|
+
# do NOT want to save the raw response to the cache, because it contains things like
|
|
295
|
+
# the number of tokens remaining, which won't be relevant/valid when the value is
|
|
296
|
+
# pulled from the ache
|
|
297
|
+
if 'with_raw_response' in self._stem:
|
|
298
|
+
out = out.parse()
|
|
299
|
+
|
|
300
|
+
# Prepare the output object
|
|
301
|
+
out_obj = {'out':out, 'time_saved':time.time(), 'run_time':run_time}
|
|
302
|
+
|
|
303
|
+
# Check whether this is a request throught the image API - if so, we need to check
|
|
304
|
+
# whether URLs were returned; if they were, we should save them
|
|
305
|
+
if 'images' in self._stem:
|
|
306
|
+
saved_images = []
|
|
307
|
+
for im in out.data:
|
|
308
|
+
if im.url:
|
|
309
|
+
saved_images.append(requests.get(im.url).content)
|
|
310
|
+
else:
|
|
311
|
+
saved_images.append(None)
|
|
312
|
+
out_obj['saved_images'] = saved_images
|
|
313
|
+
|
|
314
|
+
# Check whether this is a request throught he audio API - if so, we need to download
|
|
315
|
+
# the resulting file, and save it. Unfortunately, there are two ways this API might
|
|
316
|
+
# be called - the legacy way (client.audio.speech.create) and the new way (client.
|
|
317
|
+
# audio.speech.with_streaming_response.create). They each require different ways to
|
|
318
|
+
# download the file
|
|
319
|
+
if type(out) == openai._legacy_response.HttpxBinaryResponseContent:
|
|
320
|
+
# The user used the legacy format; get the file in base64 format
|
|
321
|
+
audio_data = io.BytesIO()
|
|
322
|
+
for chunk in out.iter_bytes():
|
|
323
|
+
audio_data.write(chunk)
|
|
324
|
+
audio_data.seek(0)
|
|
325
|
+
audio_data = audio_data.read()
|
|
326
|
+
audio_data = base64.b64encode(audio_data).decode('utf-8')
|
|
327
|
+
|
|
328
|
+
out_obj['audio_file'] = ('old', audio_data)
|
|
329
|
+
|
|
330
|
+
if type(out) == openai._response.ResponseContextManager:
|
|
331
|
+
# The user used the new format; get the file in base64 format
|
|
332
|
+
audio_data = io.BytesIO()
|
|
333
|
+
with out as _out:
|
|
334
|
+
for chunk in _out.iter_bytes():
|
|
335
|
+
audio_data.write(chunk)
|
|
336
|
+
audio_data.seek(0)
|
|
337
|
+
audio_data = audio_data.read()
|
|
338
|
+
audio_data = base64.b64encode(audio_data).decode('utf-8')
|
|
339
|
+
|
|
340
|
+
out_obj['audio_file'] = ('new', audio_data)
|
|
341
|
+
|
|
342
|
+
# First, save the entry as provided
|
|
343
|
+
seeded_key = self.get_cache_key(kwargs, hash_key=False)
|
|
344
|
+
|
|
345
|
+
if 'seed' in kwargs:
|
|
346
|
+
# If this call includes a seed, we just want to overwrite whatever already exists in
|
|
347
|
+
# the cache at that position
|
|
348
|
+
self.modify_cache(seeded_key, [out_obj])
|
|
349
|
+
|
|
350
|
+
# Now, strip the seed, and look at the corresponding entry - if a pointer to this
|
|
351
|
+
# seeded entry doesn't yet exist there, add it
|
|
352
|
+
stripped_key = self.get_cache_key(kwargs, strip_seed=True, hash_key=False)
|
|
353
|
+
|
|
354
|
+
current_pointers = [i['TARGET'] for i in self._cache.get(stripped_key, []) if 'TARGET' in i]
|
|
355
|
+
|
|
356
|
+
if seeded_key not in current_pointers:
|
|
357
|
+
self.modify_cache(stripped_key, self._cache.get(stripped_key, []) + [{'TARGET':seeded_key}])
|
|
358
|
+
else:
|
|
359
|
+
# This isn't a seeded request. If we already have an entry there, append this request
|
|
360
|
+
# to the list
|
|
361
|
+
self.modify_cache(seeded_key, self._cache.get(seeded_key, []) + [out_obj])
|
|
362
|
+
|
|
363
|
+
def __call__(self, **kwargs):
|
|
364
|
+
'''
|
|
365
|
+
This function is called whenever an OpenAI function is called
|
|
366
|
+
'''
|
|
367
|
+
|
|
368
|
+
# Try and read the value from the cache
|
|
369
|
+
out = self.read_from_cache(kwargs)
|
|
370
|
+
|
|
371
|
+
if out is not None:
|
|
372
|
+
# We were able to pull a value from the cache; return either the value, or an async
|
|
373
|
+
# funcion that returns it. Pause if needed.
|
|
374
|
+
|
|
375
|
+
if self._is_async:
|
|
376
|
+
async def async_func():
|
|
377
|
+
if self._delay_responses:
|
|
378
|
+
await asyncio.sleep(out['run_time'])
|
|
379
|
+
return out['out']
|
|
380
|
+
return async_func()
|
|
381
|
+
|
|
382
|
+
else:
|
|
383
|
+
if self._delay_responses:
|
|
384
|
+
time.sleep(out['run_time'])
|
|
385
|
+
return out['out']
|
|
386
|
+
|
|
387
|
+
# If we reached this point, we need to query OpenAI. Make sure we have an OpenAI key
|
|
388
|
+
if self._api_key is None:
|
|
389
|
+
raise ValueError('Your request is not available in the cache, and you did not provide '
|
|
390
|
+
"an OpenAI API key, so I can't query OpenAI for you.")
|
|
391
|
+
|
|
392
|
+
# Create a "real" openai.OpenAI client object (sync or async as needed)
|
|
393
|
+
if self._is_async:
|
|
394
|
+
rel_func = openai.AsyncOpenAI(api_key=self._api_key)
|
|
395
|
+
else:
|
|
396
|
+
rel_func = openai.OpenAI(api_key=self._api_key)
|
|
397
|
+
|
|
398
|
+
# Go down the stem tree to find the relevant function
|
|
399
|
+
for attr in self._stem:
|
|
400
|
+
rel_func = getattr(rel_func, attr)
|
|
401
|
+
|
|
402
|
+
# If the function was called with a seed but the OpenAI function does not accept one,
|
|
403
|
+
# strip it before calling
|
|
404
|
+
kwargs_copy = {i:j for i, j in kwargs.items()}
|
|
405
|
+
if 'seed' not in inspect.signature(rel_func).parameters:
|
|
406
|
+
if 'seed' in kwargs_copy:
|
|
407
|
+
if self._verbose:
|
|
408
|
+
print('Detected a seed parameter in an OpenAPI call that does not accept a seed. '
|
|
409
|
+
"I'll strip the parameter from the call before sending it to OpenAI, but "
|
|
410
|
+
"save it in the cached response. See the user manual (section 'repeated "
|
|
411
|
+
"requests') for details" )
|
|
412
|
+
kwargs_copy = {i:j for i, j in kwargs.items() if i != 'seed'}
|
|
413
|
+
|
|
414
|
+
|
|
415
|
+
# Call it, write the result to the cache, and return either the value or the co-routine
|
|
416
|
+
# if we are in async mode
|
|
417
|
+
if self._is_async:
|
|
418
|
+
async def async_func():
|
|
419
|
+
start_time = time.time()
|
|
420
|
+
out = await rel_func(**kwargs_copy)
|
|
421
|
+
self.write_to_cache(kwargs, out, time.time() - start_time)
|
|
422
|
+
return out
|
|
423
|
+
|
|
424
|
+
return async_func()
|
|
425
|
+
|
|
426
|
+
else:
|
|
427
|
+
start_time = time.time()
|
|
428
|
+
out = rel_func(**kwargs_copy)
|
|
429
|
+
self.write_to_cache(kwargs, out, time.time() - start_time)
|
|
430
|
+
|
|
431
|
+
return out
|
cached_openai/main.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
# File utilities
|
|
2
|
+
import os
|
|
3
|
+
from . import utils
|
|
4
|
+
from . import cached_client
|
|
5
|
+
from . import materialize_utils
|
|
6
|
+
import datetime
|
|
7
|
+
|
|
8
|
+
######################
|
|
9
|
+
# Define constants #
|
|
10
|
+
######################
|
|
11
|
+
|
|
12
|
+
# Specify the name of the permanent cache file. We will first look for this file
|
|
13
|
+
# in the working directory; if it exists there, we load it from there. If it
|
|
14
|
+
# doesn't, we look for it in the package directory
|
|
15
|
+
CACHE_FILE_NAME = 'openai.cache'
|
|
16
|
+
|
|
17
|
+
# Determine whether we want to print debugging information as we run the package;
|
|
18
|
+
# this is required if the 'CACHED_OPENAI_VERBOSE' variable is set in the
|
|
19
|
+
# environment
|
|
20
|
+
VERBOSE = 'CACHED_OPENAI_VERBOSE' in os.environ
|
|
21
|
+
|
|
22
|
+
# In dev mode, all calls to the open AI API will be cached in the temporary cache
|
|
23
|
+
# file, and every time a request is made (either new or cached), we will track
|
|
24
|
+
# the fact the request has been made in a used keys file. The latter allows a
|
|
25
|
+
# "cleaning operation" that only keeps data for requests that have been made
|
|
26
|
+
# since this file was created.
|
|
27
|
+
#
|
|
28
|
+
# Dev mode is turned on if the 'CACHED_OPENAI_DEV_MODE' variable is set in the
|
|
29
|
+
# environment
|
|
30
|
+
DEV_MODE = 'CACHED_OPENAI_DEV_MODE' in os.environ
|
|
31
|
+
if VERBOSE:
|
|
32
|
+
print(f'DEV_MODE is set to {DEV_MODE}')
|
|
33
|
+
|
|
34
|
+
# The name of the temporary cache and used keys files which will be written to in
|
|
35
|
+
# dev mode. These will be in the code's working directory
|
|
36
|
+
TEMP_CACHE_FILE_NAME = 'openai_cache_temp.bin'
|
|
37
|
+
USED_KEYS_FILE = 'openai_cache_used.txt'
|
|
38
|
+
|
|
39
|
+
# When we recover values form the cache, we can either return the values instantly
|
|
40
|
+
# or delay for as long as it took to originally get a response from the OpenAI
|
|
41
|
+
# API. By default, no pausing will happen, unless a 'CACHED_OPENAI_DELAY_RESPONSES'
|
|
42
|
+
# variable is set in the environment
|
|
43
|
+
DELAY_RESPONSES_NEW = 'CACHED_OPENAI_DELAY_RESPONSES' in os.environ
|
|
44
|
+
if VERBOSE:
|
|
45
|
+
print(f'DELAY_RESPONSES is set to {DELAY_RESPONSES_NEW}')
|
|
46
|
+
|
|
47
|
+
###########
|
|
48
|
+
# Setup #
|
|
49
|
+
###########
|
|
50
|
+
|
|
51
|
+
# Load the cache
|
|
52
|
+
delay_responses, cache = utils.get_cache(CACHE_FILE_NAME, TEMP_CACHE_FILE_NAME, DEV_MODE, DELAY_RESPONSES_NEW, VERBOSE)
|
|
53
|
+
|
|
54
|
+
#################################################
|
|
55
|
+
# Create the main entrypoints for the package #
|
|
56
|
+
#################################################
|
|
57
|
+
|
|
58
|
+
def OpenAI(api_key : str | None = None):
|
|
59
|
+
return cached_client.CachedClient( api_key ,
|
|
60
|
+
cache = cache ,
|
|
61
|
+
verbose = VERBOSE ,
|
|
62
|
+
dev_mode = DEV_MODE ,
|
|
63
|
+
is_async = False ,
|
|
64
|
+
delay_responses = delay_responses ,
|
|
65
|
+
temp_cache_file = TEMP_CACHE_FILE_NAME ,
|
|
66
|
+
used_keys_file = USED_KEYS_FILE )
|
|
67
|
+
|
|
68
|
+
def AsyncOpenAI(api_key : str | None = None):
|
|
69
|
+
return cached_client.CachedClient( api_key ,
|
|
70
|
+
cache = cache ,
|
|
71
|
+
verbose = VERBOSE ,
|
|
72
|
+
dev_mode = DEV_MODE ,
|
|
73
|
+
is_async = True ,
|
|
74
|
+
delay_responses = delay_responses ,
|
|
75
|
+
temp_cache_file = TEMP_CACHE_FILE_NAME ,
|
|
76
|
+
used_keys_file = USED_KEYS_FILE )
|
|
77
|
+
|
|
78
|
+
##########################################
|
|
79
|
+
# Create the materialization functions #
|
|
80
|
+
##########################################
|
|
81
|
+
|
|
82
|
+
def materialize(self_contained : bool,
|
|
83
|
+
compress : bool,
|
|
84
|
+
hash_keys : bool,
|
|
85
|
+
used_keys_only : bool):
|
|
86
|
+
'''
|
|
87
|
+
This function materilizes the cache and prepares it for distribution. It accepts
|
|
88
|
+
the following options
|
|
89
|
+
- self_contained : if True, the function will create a file called
|
|
90
|
+
cached_openai_{date}.py in the current working directory which will contain
|
|
91
|
+
the materialized cache. This file will be a self-contained .py file containing
|
|
92
|
+
a serialized verison of the cache. Just import that package, and you're good
|
|
93
|
+
to go with the full cache.
|
|
94
|
+
|
|
95
|
+
If False, the function will create a file called named openai_{date}.cache
|
|
96
|
+
containing the cache only. The file can be distributed in three ways
|
|
97
|
+
- Rename it to openai.cache, and ask your target audience to place the file
|
|
98
|
+
in their working directory. They can then install cached_openai through
|
|
99
|
+
pypi, and when they load the package, it will find the file in the working
|
|
100
|
+
directory and load it
|
|
101
|
+
- Upload it to some URL. Then, get your audience to install cached_openai
|
|
102
|
+
from pypi. When they load the package, it will prompt them for the URL,
|
|
103
|
+
from which the file will autoomatically be downloaded
|
|
104
|
+
- Fork cached_openai, rename the file to openai.cache, and include it in
|
|
105
|
+
the src directory of the package. Publish it. When your target audience
|
|
106
|
+
downloads the package, the cache will be read from this file.
|
|
107
|
+
|
|
108
|
+
- compress: True if the cache should be compressed before saving or false
|
|
109
|
+
otherwise. Note that this means the package will take longer to run every time
|
|
110
|
+
it is loaded
|
|
111
|
+
|
|
112
|
+
- hash_keys : whether keys should be hashed; if True, the dictionary keys will be hashed
|
|
113
|
+
and the pickle file will be smaller. On the other hand, the cache will then become
|
|
114
|
+
*final* - it will be impossible to add to it. If you choose to go the hashed direction,
|
|
115
|
+
it is recommended you first save the cache without hashing the keys so that you can
|
|
116
|
+
later add to it if you like
|
|
117
|
+
|
|
118
|
+
- used_keys_only : if True, the cache will only contain the keys that have been
|
|
119
|
+
created or access since the last time the openai_cache_used.txt file was created.
|
|
120
|
+
|
|
121
|
+
This is useful if you fill the cache with a whole bunch of data while you test
|
|
122
|
+
your code, and then want to create a "clean" version to distribute. Just delete the
|
|
123
|
+
openai_cache_used.txt file, run you clean code, and then run this with used_keys_only
|
|
124
|
+
= True.
|
|
125
|
+
'''
|
|
126
|
+
|
|
127
|
+
current_date = datetime.datetime.now().strftime('%Y-%m-%d')
|
|
128
|
+
|
|
129
|
+
if used_keys_only:
|
|
130
|
+
with open(USED_KEYS_FILE, 'r') as f:
|
|
131
|
+
used_keys = f.read().split('\n')
|
|
132
|
+
else:
|
|
133
|
+
used_keys = None
|
|
134
|
+
|
|
135
|
+
if self_contained:
|
|
136
|
+
materialize_utils.create_self_contained(cache = cache,
|
|
137
|
+
delay_responses = delay_responses,
|
|
138
|
+
compress = compress,
|
|
139
|
+
hash_keys = hash_keys,
|
|
140
|
+
file_name = f'cached_openai_{current_date}.py',
|
|
141
|
+
used_keys = used_keys)
|
|
142
|
+
else:
|
|
143
|
+
materialize_utils.materialize_cache(cache = cache,
|
|
144
|
+
delay_responses = delay_responses,
|
|
145
|
+
compress = compress,
|
|
146
|
+
hash_keys = hash_keys,
|
|
147
|
+
file_name = CACHE_FILE_NAME.split('.')[0]
|
|
148
|
+
+ f'_{current_date}.'
|
|
149
|
+
+ CACHE_FILE_NAME.split('.')[-1],
|
|
150
|
+
used_keys = used_keys)
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
################################
|
|
2
|
+
# Materialization utilities #
|
|
3
|
+
################################
|
|
4
|
+
|
|
5
|
+
import gzip
|
|
6
|
+
import pickle
|
|
7
|
+
import base64
|
|
8
|
+
import hashlib
|
|
9
|
+
import importlib
|
|
10
|
+
|
|
11
|
+
# This file contains utilities to materialize the cache into a form that
|
|
12
|
+
# can be distributed
|
|
13
|
+
|
|
14
|
+
def materialize_cache(cache : dict,
|
|
15
|
+
delay_responses : bool,
|
|
16
|
+
compress : bool,
|
|
17
|
+
hash_keys : bool,
|
|
18
|
+
file_name : str | None,
|
|
19
|
+
used_keys : list | None = None) -> str | None:
|
|
20
|
+
'''
|
|
21
|
+
This function materializes the cache into a pickle file for distribution. It accepts
|
|
22
|
+
the following arguments:
|
|
23
|
+
- cache : the cache to be saved
|
|
24
|
+
- compress : whether the cache should be compressed
|
|
25
|
+
- hash_keys : whether keys should be hashed; if yes, the dictionary keys will be hashed
|
|
26
|
+
and the pickle file will be smaller. On the other hand, the cache will then
|
|
27
|
+
become *final* - it will be impossible to add to it. If you choose to go
|
|
28
|
+
the hashed direction, it is recommended you first save the cache without
|
|
29
|
+
hashing the keys so that you can later add to it if you like
|
|
30
|
+
- file_name : the file name to be used. If no filename is provided, a base64 encoded
|
|
31
|
+
string is returned
|
|
32
|
+
- used_keys : a list of keys that should be included in the cache; any keys are that are
|
|
33
|
+
not included in this list are discarded
|
|
34
|
+
'''
|
|
35
|
+
|
|
36
|
+
# If we want used keys only, filter down the cache
|
|
37
|
+
if used_keys:
|
|
38
|
+
cache = {i:j for i, j in cache.items() if i in used_keys}
|
|
39
|
+
|
|
40
|
+
# If we want to hash the keys, go ahead and do it
|
|
41
|
+
if hash_keys:
|
|
42
|
+
cache = {hashlib.md5(i.encode('utf-8')).hexdigest():j
|
|
43
|
+
for i, j in cache.items()}
|
|
44
|
+
|
|
45
|
+
# Make all pointers point to hashed values
|
|
46
|
+
for key, vals in cache.items():
|
|
47
|
+
for val in vals:
|
|
48
|
+
if 'TARGET' in val:
|
|
49
|
+
val['TARGET'] = hashlib.md5(val['TARGET'].encode('utf-8')).hexdigest()
|
|
50
|
+
|
|
51
|
+
# Pickle the dictionary
|
|
52
|
+
pickled_data = pickle.dumps((delay_responses, cache))
|
|
53
|
+
|
|
54
|
+
# Compress it if needed
|
|
55
|
+
if compress:
|
|
56
|
+
pickled_data = gzip.compress(pickled_data)
|
|
57
|
+
|
|
58
|
+
# If the filename is provided, save it there; otherwise, return a b64 encoded
|
|
59
|
+
# string
|
|
60
|
+
if file_name:
|
|
61
|
+
with open(file_name, 'wb') as f:
|
|
62
|
+
f.write(pickled_data)
|
|
63
|
+
|
|
64
|
+
else:
|
|
65
|
+
return base64.b64encode(pickled_data).decode('utf-8')
|
|
66
|
+
|
|
67
|
+
def create_self_contained(cache : dict,
|
|
68
|
+
delay_responses : bool,
|
|
69
|
+
compress : bool,
|
|
70
|
+
hash_keys : bool,
|
|
71
|
+
file_name : str | None,
|
|
72
|
+
used_keys : list | None = None) -> str | None:
|
|
73
|
+
'''
|
|
74
|
+
Returns a self-contained .py file that includes the cache inside of it.
|
|
75
|
+
|
|
76
|
+
See _materialized_cache for an explanation of the arguments
|
|
77
|
+
'''
|
|
78
|
+
|
|
79
|
+
# Create a placeholder for the output code
|
|
80
|
+
out_code = []
|
|
81
|
+
|
|
82
|
+
# Get the cache as a base 64 encoded string; do not include the file name to
|
|
83
|
+
# ensure nothing is saved
|
|
84
|
+
b64_cache = materialize_cache(cache, delay_responses, compress, hash_keys, None, used_keys)
|
|
85
|
+
|
|
86
|
+
# Ensure only the OpenAI and AsyncOpenAI functions are exposed
|
|
87
|
+
out_code.append("__all__ = ['OpenAI', 'AsyncOpenAI']")
|
|
88
|
+
|
|
89
|
+
# Add the cache and the code to decompress it to the self-contained file
|
|
90
|
+
out_code.append('import base64')
|
|
91
|
+
out_code.append('import pickle')
|
|
92
|
+
out_code.append(f'cache = base64.b64decode("{b64_cache}")')
|
|
93
|
+
|
|
94
|
+
if compress:
|
|
95
|
+
out_code.append('import gzip')
|
|
96
|
+
out_code.append('cache = gzip.decompress(cache)')
|
|
97
|
+
|
|
98
|
+
out_code.append('DELAY_RESPONSES, cache = pickle.loads(cache)')
|
|
99
|
+
|
|
100
|
+
# Load the cached_client.py file
|
|
101
|
+
with open(importlib.resources.files(__package__).joinpath('cached_client.py'), 'r') as f:
|
|
102
|
+
out_code.extend(f.read().split('\n'))
|
|
103
|
+
|
|
104
|
+
# Create the main entrypoints
|
|
105
|
+
entrypoint_args = ( ' api_key ,'
|
|
106
|
+
'cache = cache ,'
|
|
107
|
+
'verbose = False ,'
|
|
108
|
+
'dev_mode = False ,'
|
|
109
|
+
'delay_responses = DELAY_RESPONSES ,'
|
|
110
|
+
'temp_cache_file = None ,'
|
|
111
|
+
'used_keys_file = None ')
|
|
112
|
+
|
|
113
|
+
out_code.append(f'def OpenAI (api_key : str | None = None) : return CachedClient({entrypoint_args}, is_async = False)')
|
|
114
|
+
out_code.append(f'def AsyncOpenAI(api_key : str | None = None) : return CachedClient({entrypoint_args}, is_async = True)' )
|
|
115
|
+
|
|
116
|
+
# Save the result
|
|
117
|
+
with open(file_name, 'w') as f:
|
|
118
|
+
f.write('\n'.join(out_code))
|
cached_openai/utils.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import gzip
|
|
3
|
+
import tqdm
|
|
4
|
+
import os
|
|
5
|
+
import importlib.resources
|
|
6
|
+
import pickle
|
|
7
|
+
import struct
|
|
8
|
+
|
|
9
|
+
def download_cache(cache_url : str, target_file : str) -> None:
|
|
10
|
+
'''
|
|
11
|
+
This function takes the URL of a cache file, downloads it (with a progress bar) and saves
|
|
12
|
+
it to the target_file. If the URL extension ends with .gz, the file is unzipped as it
|
|
13
|
+
downloads
|
|
14
|
+
'''
|
|
15
|
+
|
|
16
|
+
# If the URL is incomplete, prepend https://
|
|
17
|
+
if not (cache_url.startswith('http://') or cache_url.startswith('https://')):
|
|
18
|
+
cache_url = 'https://' + cache_url
|
|
19
|
+
|
|
20
|
+
# Check whether the file is a .gz file
|
|
21
|
+
is_gz = cache_url.endswith('.gz')
|
|
22
|
+
|
|
23
|
+
# Start the request
|
|
24
|
+
with requests.get(cache_url, stream=True) as response:
|
|
25
|
+
# Raise an error if the request was unsuccessful
|
|
26
|
+
response.raise_for_status()
|
|
27
|
+
|
|
28
|
+
# Get the total size and prepare the progress bar
|
|
29
|
+
total_size = int(response.headers.get('content-length', 0))
|
|
30
|
+
progress_bar = tqdm.tqdm(total=total_size, unit='B', unit_scale=True, desc='Downloading cache file')
|
|
31
|
+
|
|
32
|
+
if is_gz:
|
|
33
|
+
# Monkey patch the read method
|
|
34
|
+
original_read = response.raw.read
|
|
35
|
+
def new_read(chunk_size=-1):
|
|
36
|
+
data = original_read(chunk_size)
|
|
37
|
+
if data:
|
|
38
|
+
progress_bar.update(len(data))
|
|
39
|
+
return data
|
|
40
|
+
response.raw.read = new_read
|
|
41
|
+
|
|
42
|
+
with gzip.GzipFile(fileobj=response.raw, mode='rb') as gz_in, open(target_file, 'wb') as f:
|
|
43
|
+
while True:
|
|
44
|
+
data = gz_in.read(8192)
|
|
45
|
+
if not data:
|
|
46
|
+
break
|
|
47
|
+
f.write(data)
|
|
48
|
+
|
|
49
|
+
else:
|
|
50
|
+
with open(target_file, 'wb') as f:
|
|
51
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
52
|
+
if chunk:
|
|
53
|
+
f.write(chunk)
|
|
54
|
+
progress_bar.update(len(chunk))
|
|
55
|
+
|
|
56
|
+
progress_bar.close()
|
|
57
|
+
|
|
58
|
+
def load_cache_file(cache_file : bytes) -> dict:
|
|
59
|
+
'''
|
|
60
|
+
This function accept a bytes string containing a pickled object; if first checks
|
|
61
|
+
whether it can be loaded uncompressed, and if not, it tries to load it as a
|
|
62
|
+
compressed file
|
|
63
|
+
|
|
64
|
+
It returns the object from the pickle
|
|
65
|
+
'''
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
return pickle.loads(cache_file)
|
|
69
|
+
except:
|
|
70
|
+
try:
|
|
71
|
+
return pickle.loads(gzip.decompress(cache_file))
|
|
72
|
+
except:
|
|
73
|
+
raise ValueError('Invalid cache file provided')
|
|
74
|
+
|
|
75
|
+
def get_cache(cache_file_name : str ,
|
|
76
|
+
temp_cache_file_name : str ,
|
|
77
|
+
dev_mode : bool ,
|
|
78
|
+
delay_responses_new : bool ,
|
|
79
|
+
verbose : bool = False ) -> tuple[bool, dict]:
|
|
80
|
+
'''
|
|
81
|
+
This function attempts to load the cache from disk, in the following order of priority
|
|
82
|
+
- First, we look in the working directory
|
|
83
|
+
- Then, we look in the directory containing the package
|
|
84
|
+
- Finally, two options
|
|
85
|
+
* If we are in dev mode, we create a blank cache
|
|
86
|
+
* Otherwise, we prompt the user for a URL to download the cache therefrom
|
|
87
|
+
(compressed or not). It saves it in the working directory with the name
|
|
88
|
+
cache_file_name
|
|
89
|
+
|
|
90
|
+
If we are in dev_mode, additionally carry out the following tasks
|
|
91
|
+
- Try and load the temporary cache file, if it exists, and update the main cache
|
|
92
|
+
with it
|
|
93
|
+
- If the delay_responses parameter has changed (i.e., if delay_responses in the cache
|
|
94
|
+
file is not equal to delay_responses_new), it is overwritten in the cache file
|
|
95
|
+
|
|
96
|
+
It returns a tuple with two entries
|
|
97
|
+
- The value of delay_responses to use
|
|
98
|
+
- The cache
|
|
99
|
+
'''
|
|
100
|
+
|
|
101
|
+
# Load the existing cache
|
|
102
|
+
# -----------------------
|
|
103
|
+
|
|
104
|
+
# Get the location of the package in case we need it later
|
|
105
|
+
package_location = importlib.resources.files(__package__)
|
|
106
|
+
|
|
107
|
+
# Determine the location of the cache file
|
|
108
|
+
if os.path.exists(cache_file_name):
|
|
109
|
+
# First, look in the working directory
|
|
110
|
+
cache_loc = cache_file_name
|
|
111
|
+
|
|
112
|
+
if verbose:
|
|
113
|
+
print('Cache file found in the working directory.')
|
|
114
|
+
|
|
115
|
+
elif os.path.exists(package_location.joinpath(cache_file_name)):
|
|
116
|
+
# If there is no cache file in the working directory, look in the package
|
|
117
|
+
# directory
|
|
118
|
+
cache_loc = package_location.joinpath(CACHE_FILE_NAME)
|
|
119
|
+
|
|
120
|
+
if verbose:
|
|
121
|
+
print('Cache file found in the package directory.')
|
|
122
|
+
|
|
123
|
+
else:
|
|
124
|
+
# If we have neither and we are in dev mode, start with an empty cache; if not
|
|
125
|
+
# ask the user whether they want to download a cache file, and download it to the
|
|
126
|
+
# working directory
|
|
127
|
+
if dev_mode:
|
|
128
|
+
print('No cache file was found; starting with an empty cache. It will be saved in '
|
|
129
|
+
f'your working directory, with the name {cache_file_name}.')
|
|
130
|
+
cache_loc = cache_file_name
|
|
131
|
+
pickle.dump((delay_responses_new, {}), open(cache_loc, 'wb'))
|
|
132
|
+
|
|
133
|
+
else:
|
|
134
|
+
cache_url = input('No cache file found; please enter a URL to download a cache file from\n')
|
|
135
|
+
|
|
136
|
+
try:
|
|
137
|
+
download_cache(cache_url, cache_file_name)
|
|
138
|
+
cache_loc = cache_file_name
|
|
139
|
+
|
|
140
|
+
print('Cache file downloaded successfully.')
|
|
141
|
+
except:
|
|
142
|
+
print('Failed to download cache file')
|
|
143
|
+
raise
|
|
144
|
+
|
|
145
|
+
# Finally, read the cache file
|
|
146
|
+
with open(cache_loc, 'rb') as f : data = f.read()
|
|
147
|
+
delay_responses, cache = load_cache_file(data)
|
|
148
|
+
|
|
149
|
+
# Update with the temporary cache file, or the new delay_responses value
|
|
150
|
+
# ----------------------------------------------------------------------
|
|
151
|
+
|
|
152
|
+
# If we are in dev mode, load any temporary cache file, and update the cache with it; then,
|
|
153
|
+
# re-save the updated cache
|
|
154
|
+
if dev_mode:
|
|
155
|
+
if os.path.exists(temp_cache_file_name):
|
|
156
|
+
if verbose:
|
|
157
|
+
print('Temporary cache file found; using it to update the cache.')
|
|
158
|
+
|
|
159
|
+
# Load the temporary cache file
|
|
160
|
+
with open(temp_cache_file_name, 'rb') as f:
|
|
161
|
+
while True:
|
|
162
|
+
# Read the length of the next entry; break if we're reached the end of
|
|
163
|
+
# the file
|
|
164
|
+
length_data = f.read(4)
|
|
165
|
+
if not length_data:
|
|
166
|
+
break
|
|
167
|
+
length = struct.unpack('I', length_data)[0]
|
|
168
|
+
|
|
169
|
+
key, value = pickle.loads(f.read(length))
|
|
170
|
+
|
|
171
|
+
cache[key] = value
|
|
172
|
+
|
|
173
|
+
# Re-save the cache (note that in dev mode, we always save it uncompressed)
|
|
174
|
+
# Use the new delay_responses value
|
|
175
|
+
pickle.dump((delay_responses_new, cache), open(cache_loc, 'wb'))
|
|
176
|
+
|
|
177
|
+
# Delete the temporary cache file
|
|
178
|
+
os.remove(temp_cache_file_name)
|
|
179
|
+
|
|
180
|
+
if verbose:
|
|
181
|
+
print('Cache updated and re-saved. Temporary cache file deleted')
|
|
182
|
+
|
|
183
|
+
elif delay_responses_new != delay_responses:
|
|
184
|
+
pickle.dump((delay_responses_new, cache), open(cache_loc, 'wb'))
|
|
185
|
+
|
|
186
|
+
if verbose:
|
|
187
|
+
print('delay_responses value was changed - cache re-saved')
|
|
188
|
+
|
|
189
|
+
# Set the value of delay_responses to the new value
|
|
190
|
+
delay_responses = delay_responses_new
|
|
191
|
+
|
|
192
|
+
# Return
|
|
193
|
+
# ------
|
|
194
|
+
return delay_responses, cache
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cached_openai
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A version of the OpenAI package that allows the caching of responses
|
|
5
|
+
Project-URL: Homepage, https://github.com/danguetta/cached_openai
|
|
6
|
+
Project-URL: Issues, https://github.com/danguetta/cached_openai/issues
|
|
7
|
+
Author-email: Daniel Guetta <daniel@guetta.com>
|
|
8
|
+
License-Expression: MIT
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Requires-Python: >=3.8
|
|
13
|
+
Requires-Dist: openai>=1.66.2
|
|
14
|
+
Requires-Dist: requests>=2.32.2
|
|
15
|
+
Requires-Dist: tqdm>=4.67.1
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
# cached_openai
|
|
19
|
+
|
|
20
|
+
`cached_openai` is a simple Python library that mimics the `OpenAI` python library, but can draw from a cache instead of sending the request to Open AI.
|
|
21
|
+
|
|
22
|
+
When it is run in **dev mode**, it caches responses to all requests made from OpenAI, and returns the cached value if the request is made again. When it is run in **production mode**, no new cache entries are created, but if any requests stored in the cache are made again, they are returned therefrom.
|
|
23
|
+
|
|
24
|
+
It is able to cache several different responses for a single request, to mimic the way the OpenAI API returns different responses when it receives the same query twice. It is also able to handle requests that return images or sound clips.
|
|
25
|
+
|
|
26
|
+
The user experience with `cached_open` is identical to that with the original `openai` package. Consider, for example, the following piece of code using the "regular" `openai` api:
|
|
27
|
+
```
|
|
28
|
+
import openai
|
|
29
|
+
client = openai.OpenAI(api_key=...)
|
|
30
|
+
response = client.chat.completions.create(...)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
In `cached_openai`, the **only** change required would be to change the first line to `import cached_openai as openai`. That's it - everything else works identically. If the query is cached, the cached version will be returned. If not, the `OpenAI` API will be queries as usual.
|
|
34
|
+
|
|
35
|
+
The package can be configured to return the cached entry immediately, *or* to replicate the delay that would occur if the request were to be made from OpenAI directly.
|
|
36
|
+
|
|
37
|
+
The package also works with the async version of the OpenAI API - just use `openai.AsyncOpenAI(...)`.
|
|
38
|
+
|
|
39
|
+
## Why `cached_openai`?
|
|
40
|
+
|
|
41
|
+
I designed this package for teaching purposes - when I teach classes that use the OpenAI API, I often provide code to students which they run on their machines before modifying it. This isn't ideal for two reasons
|
|
42
|
+
1. Every student has to pay to run the API requests in the code - there can be hundreds or thousands of these requests, and the costs can add up.
|
|
43
|
+
2. Every student is likely to get different answers out of the API - it makes it difficult to teach the class when everyone is looking at a different answer.
|
|
44
|
+
|
|
45
|
+
Instead, I can now distributed `cached_openai` with a cache I prepared - students can then run the entire code for free, and get the same answers as I did. They can then modify the code to run their own version, and query OpenAI as usual.
|
|
46
|
+
|
|
47
|
+
# User manual
|
|
48
|
+
|
|
49
|
+
This user manual is divided into two parts - preparing a cache, and distributing it.
|
|
50
|
+
|
|
51
|
+
## Preparing a cache
|
|
52
|
+
|
|
53
|
+
To prepare a cache, you need to run `cached_openai` in **dev mode**. To do this, set an environment variable called `CACHED_OPENAI_DEV_MODE` to any value before you load the package. I also recommended you set an environment variable called `CACHED_OPENAI_VERBOSE` to any value to print debugging messages as the package runs. The easiest way too do this is in your code, before you import the package
|
|
54
|
+
|
|
55
|
+
```
|
|
56
|
+
import os
|
|
57
|
+
os.environ['CACHED_OPENAI_VERBOSE'] = 'True'
|
|
58
|
+
os.environ['CACHED_OPENAI_DEV_MODE'] = 'True'
|
|
59
|
+
import cached_openai
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
You should then use `cached_openai` exactly as you would `OpenAI` - every request you make will be cached. In dev mode, the package creates three files in your working directory
|
|
63
|
+
- `openai_cache_temp.bin` is a temporary cache file - it is updated every time a call is made to the OpenAI API to store the results of that call. It can be updated very quickly (so as not to slow down the call), but stores data in an inneficient format.
|
|
64
|
+
- `openai_cache.cache` is the main cache file which contains all cached requests in an efficient format (a dictionary). Every time the package is loaded, the data in `openai_cache_temp.bin` is integrated into this permanent cache file, and the temporary file is deleted.
|
|
65
|
+
- `openai_cache_used.txt` stores all the keys of requests that have been made or saved since the file was last created; we'll explain the purpose of this file later.
|
|
66
|
+
|
|
67
|
+
### Determining delays
|
|
68
|
+
|
|
69
|
+
The package can be configured to return the cached entry immediately, *or* to replicate the delay that would occur if the request were to be made from OpenAI directly.
|
|
70
|
+
|
|
71
|
+
By default, the package will be configured to return the result immediately. If you would, instead, like it to replicate the delay that was initially observed when the request was made, simply set an environment variable called `CACHED_OPENAI_DELAY_RESPONSES` to any value before you load the package.
|
|
72
|
+
|
|
73
|
+
### Repeated requests
|
|
74
|
+
|
|
75
|
+
When you use `cached_openai` to run a request that has already been run, a new request will not be made to OpenAI - the *cached* result is returned instead, for free.
|
|
76
|
+
|
|
77
|
+
This does mean, however, that that result will be the same every time. What if you want to store several different responses to a single request? All you need to do is provide a `seed` parameter when you run that same request - if the function has been run with that identical seed before, that result is returned, but if not, the query is run again against the OpenAI API. If the underlying OpenAI API function has a seed parameter (eg: `chat.completions.create`), the seed is passed to that function, and if not (eg: `audio.speech.create`), it is stripped before it is sent.
|
|
78
|
+
|
|
79
|
+
When the request is next called *without* a seed, it will return each of the seeded responses in succession, which will make it look like it is returning different results every time, just like the original API. Here's an example that might clarify this, using chat completions:
|
|
80
|
+
|
|
81
|
+
- **Input**: please give me a random number. **Output**: 42
|
|
82
|
+
- *This is the first time the request has been made, so it is sent to the OpenAI API.*\
|
|
83
|
+
- **Input**: please give me a random number (`seed=1`). **Output**: 25
|
|
84
|
+
- *This is the first time the request has been made with this seed, so it is sent to the OpenAI API.*
|
|
85
|
+
- **Input**: please give me a random number (`seed=2`). **Output**: 126
|
|
86
|
+
- *This is the first time the request has been made with this seed, so it is sent to the OpenAI API.*
|
|
87
|
+
- **Input**: please give me a random number (`seed=2`). **Output**: 126
|
|
88
|
+
- *This request has been made with this seed before, the cached result is returned*
|
|
89
|
+
- **Input**: please give me a random number. **Output**: 42
|
|
90
|
+
- *This request has been made before and is being called again for the first time; return the first saved response.*
|
|
91
|
+
- **Input**: please give me a random number. **Output**: 25
|
|
92
|
+
- *This request has been made before and is being called again for the second time; return the second saved response.*
|
|
93
|
+
- **Input**: please give me a random number. **Output**: 126
|
|
94
|
+
- *This request has been made before and is being called again for the third time; return the third saved response.*
|
|
95
|
+
- **Input**: please give me a random number. **Output**: 42
|
|
96
|
+
- *This request has been made before and is being called again for the four time; there is no fourth response stored, so loop back to the first stored answer.*
|
|
97
|
+
|
|
98
|
+
### Images
|
|
99
|
+
|
|
100
|
+
Image requests that return image URLs require specific handling, because OpenAI does not keep these image URLs active forever - they are deleted a few hours after the request is made. Thus, simply storing the original response with the original URL would return a broken link when the cached response was returned.
|
|
101
|
+
|
|
102
|
+
To solve this problem, `cached_openai` downloads the image from the URL before returning the result, and stores it in the cache. When the same request is later called and retrieved from the cache, the image is extracted into an `image` folder in the working directory, and the URLs in the response are replaced with local URLs pointing to those extracted images.
|
|
103
|
+
|
|
104
|
+
### Audio
|
|
105
|
+
|
|
106
|
+
Audio requests suffer from a similar problem - OpenAI returns a file stream which then needs to be downloaded. `cached_openai` handles this seamlessly by downloading the file at request time, saving it in the cache, and simulating a file stream when the request is called again and needs to be retrieved from the cache.
|
|
107
|
+
|
|
108
|
+
## Distributing the cache
|
|
109
|
+
|
|
110
|
+
Once you have created the cache for your class, there are three different ways to distribute it to students
|
|
111
|
+
- Get students to install `cached_openai` directly from pypi, and distribute a cache file that they can put in their working directory containing the cache data; you can also publish this file to a URL and `cached_openai` will ask them for a URL from which to download (and potentially decompress) the file.
|
|
112
|
+
- Create your own package on pypi that forks `cached_openai` and includes your own cache file in it (note that this only works if your cache file is small enough to be uploaded to pypi).
|
|
113
|
+
- Create a self-contained `.py` file that contains not only the `cached_openai` code, but also the cache itself, embedded in the file. You can then simply distribute this `.py` file as a "batteries included" package.
|
|
114
|
+
|
|
115
|
+
In all three cases, the first step is to call the `materialize` function, which will package the cache for you; see the function docstring for details.
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
cached_openai/__init__.py,sha256=DpcHHIIjEG1X5L1x6Mul3Lu4aPIWRHnEXz61SlOTR4w,336
|
|
2
|
+
cached_openai/cached_client.py,sha256=jD-L5d9OrDBK7--iUJiGY2wtXTfeECY00_Aq6WzpKzI,19597
|
|
3
|
+
cached_openai/main.py,sha256=2UJKySQRNVYD6bidfUvZNPIjWD7bV5CMOj8BwrKkINE,7835
|
|
4
|
+
cached_openai/materialize_utils.py,sha256=NJAihG7KfqoNcS2cpmeRMvPIQY-dR0HY6DRotAlhuEo,4923
|
|
5
|
+
cached_openai/utils.py,sha256=PzdvQsp_HD2fjUgl3tE49wZ07UAD-gsCIAkylsDJY-Q,7409
|
|
6
|
+
cached_openai-0.0.1.dist-info/METADATA,sha256=qjuKwAelc27wpbJjhPMSwkA7cXp_87upSXNrVfE9d84,9519
|
|
7
|
+
cached_openai-0.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
8
|
+
cached_openai-0.0.1.dist-info/licenses/LICENSE,sha256=_ANFpoG5LuiFv1tOVgx6DB05ngEHADp7aahMVcf_sWs,1060
|
|
9
|
+
cached_openai-0.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright 2025 Daniel Guetta
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|