bulk-chain 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bulk_chain/api.py +104 -61
- bulk_chain/core/llm_base.py +17 -43
- bulk_chain/core/service_asyncio.py +65 -0
- bulk_chain/core/service_batch.py +2 -2
- bulk_chain/core/utils.py +21 -2
- {bulk_chain-1.0.0.dist-info → bulk_chain-1.1.0.dist-info}/METADATA +28 -9
- bulk_chain-1.1.0.dist-info/RECORD +16 -0
- {bulk_chain-1.0.0.dist-info → bulk_chain-1.1.0.dist-info}/WHEEL +1 -1
- bulk_chain-1.0.0.dist-info/RECORD +0 -15
- {bulk_chain-1.0.0.dist-info → bulk_chain-1.1.0.dist-info}/LICENSE +0 -0
- {bulk_chain-1.0.0.dist-info → bulk_chain-1.1.0.dist-info}/top_level.txt +0 -0
bulk_chain/api.py
CHANGED
|
@@ -1,41 +1,35 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import collections
|
|
3
|
+
import logging
|
|
2
4
|
import os
|
|
3
5
|
from itertools import chain
|
|
4
6
|
|
|
5
7
|
from bulk_chain.core.llm_base import BaseLM
|
|
8
|
+
from bulk_chain.core.service_asyncio import AsyncioService
|
|
6
9
|
from bulk_chain.core.service_batch import BatchIterator
|
|
7
10
|
from bulk_chain.core.service_data import DataService
|
|
8
11
|
from bulk_chain.core.service_dict import DictionaryService
|
|
9
12
|
from bulk_chain.core.service_json import JsonService
|
|
10
13
|
from bulk_chain.core.service_schema import SchemaService
|
|
11
|
-
from bulk_chain.core.utils import
|
|
14
|
+
from bulk_chain.core.utils import attempt_wrapper
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
INFER_MODES = {
|
|
15
|
-
"
|
|
16
|
-
|
|
18
|
+
"single": lambda llm, batch, **kwargs: [llm.ask(prompt) for prompt in batch],
|
|
19
|
+
"single_stream": lambda llm, batch, **kwargs: [llm.ask_stream(prompt) for prompt in batch],
|
|
20
|
+
"batch": lambda llm, batch, **kwargs: llm.ask(batch),
|
|
21
|
+
"batch_async": lambda llm, batch, **kwargs: AsyncioService.run_tasks(
|
|
22
|
+
batch=batch, async_handler=llm.ask_async, event_loop=kwargs.get("event_loop")
|
|
23
|
+
),
|
|
24
|
+
"batch_stream_async": lambda llm, batch, **kwargs: AsyncioService.run_tasks(
|
|
25
|
+
batch=batch, async_handler=llm.ask_stream_async, event_loop=kwargs.get("event_loop")
|
|
26
|
+
),
|
|
17
27
|
}
|
|
18
28
|
|
|
19
29
|
|
|
20
30
|
CWD = os.getcwd()
|
|
21
31
|
|
|
22
32
|
|
|
23
|
-
def _iter_entry_content(entry, entry_info=None, **kwargs):
|
|
24
|
-
|
|
25
|
-
if isinstance(entry, str):
|
|
26
|
-
kwargs.get("callback_str_func", lambda *_: None)(entry, entry_info)
|
|
27
|
-
yield entry
|
|
28
|
-
elif isinstance(entry, collections.abc.Iterable):
|
|
29
|
-
h = kwargs.get("callback_stream_func", lambda *_: None)
|
|
30
|
-
h(None, entry_info | {"action": "start"})
|
|
31
|
-
for chunk in map(lambda item: str(item), entry):
|
|
32
|
-
yield chunk
|
|
33
|
-
h(chunk, entry_info)
|
|
34
|
-
h(None, entry_info | {"action": "end"})
|
|
35
|
-
else:
|
|
36
|
-
raise Exception(f"Non supported type `{type(entry)}` for handling output from batch")
|
|
37
|
-
|
|
38
|
-
|
|
39
33
|
def _iter_batch_prompts(c, batch_content_it, **kwargs):
|
|
40
34
|
for ind_in_batch, entry in enumerate(batch_content_it):
|
|
41
35
|
content = DataService.get_prompt_text(
|
|
@@ -45,15 +39,53 @@ def _iter_batch_prompts(c, batch_content_it, **kwargs):
|
|
|
45
39
|
yield ind_in_batch, content
|
|
46
40
|
|
|
47
41
|
|
|
48
|
-
def
|
|
42
|
+
def __handle_agen_to_gen(handle, batch, event_loop):
|
|
43
|
+
""" This handler provides conversion of the async generator to generator (sync).
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __wrap_with_index(async_gens):
|
|
47
|
+
async def wrapper(index, agen):
|
|
48
|
+
async for item in agen:
|
|
49
|
+
yield index, item
|
|
50
|
+
return [wrapper(i, agen) for i, agen in enumerate(async_gens)]
|
|
51
|
+
|
|
52
|
+
agen_list = handle(batch, event_loop=event_loop)
|
|
53
|
+
|
|
54
|
+
it = AsyncioService.async_gen_to_iter(
|
|
55
|
+
gen=AsyncioService.merge_generators(*__wrap_with_index(agen_list)),
|
|
56
|
+
loop=event_loop)
|
|
57
|
+
|
|
58
|
+
for ind_in_batch, chunk in it:
|
|
59
|
+
yield ind_in_batch, str(chunk)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def __handle_gen(handle, batch, event_loop):
|
|
63
|
+
""" This handler deals with the iteration of each individual element of the batch.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def _iter_entry_content(entry):
|
|
67
|
+
if isinstance(entry, str):
|
|
68
|
+
yield entry
|
|
69
|
+
elif isinstance(entry, collections.abc.Iterable):
|
|
70
|
+
for chunk in map(lambda item: str(item), entry):
|
|
71
|
+
yield chunk
|
|
72
|
+
else:
|
|
73
|
+
raise Exception(f"Non supported type `{type(entry)}` for handling output from batch")
|
|
74
|
+
|
|
75
|
+
for ind_in_batch, entry in enumerate(handle(batch, event_loop=event_loop)):
|
|
76
|
+
for chunk in _iter_entry_content(entry=entry):
|
|
77
|
+
yield ind_in_batch, chunk
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _iter_chunks(p_column, batch_content_it, **kwargs):
|
|
81
|
+
handler = __handle_agen_to_gen if kwargs["infer_mode"] == "batch_stream_async" else __handle_gen
|
|
49
82
|
p_batch = [item[p_column] for item in batch_content_it]
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
yield ind_in_batch, _iter_entry_content(entry=entry, entry_info={"ind": ind_in_batch, "param": c}, **kwargs)
|
|
83
|
+
it = handler(handle=kwargs["handle_batch_func"], batch=p_batch, event_loop=kwargs["event_loop"])
|
|
84
|
+
for ind_in_batch, chunk in it:
|
|
85
|
+
yield ind_in_batch, chunk
|
|
54
86
|
|
|
55
87
|
|
|
56
|
-
def _infer_batch(batch, schema, return_mode, cols=None, **kwargs):
|
|
88
|
+
def _infer_batch(batch, batch_ind, schema, return_mode, cols=None, **kwargs):
|
|
57
89
|
assert (isinstance(batch, list))
|
|
58
90
|
|
|
59
91
|
if len(batch) == 0:
|
|
@@ -73,17 +105,21 @@ def _infer_batch(batch, schema, return_mode, cols=None, **kwargs):
|
|
|
73
105
|
|
|
74
106
|
# Handling column for inference.
|
|
75
107
|
if c in schema.r2p:
|
|
76
|
-
content_it =
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
108
|
+
content_it = _iter_chunks(p_column=schema.r2p[c], batch_content_it=iter(batch), **kwargs)
|
|
109
|
+
# Register values.
|
|
110
|
+
for item in batch:
|
|
111
|
+
item[c] = []
|
|
112
|
+
for ind_in_batch, chunk in content_it:
|
|
113
|
+
# Append batch.
|
|
114
|
+
batch[ind_in_batch][c].append(chunk)
|
|
115
|
+
# Returning (optional).
|
|
116
|
+
if return_mode == "chunk":
|
|
117
|
+
global_ind = batch_ind * len(batch) + ind_in_batch
|
|
118
|
+
yield [global_ind, c, chunk]
|
|
119
|
+
|
|
120
|
+
# Convert content to string.
|
|
121
|
+
for item in batch:
|
|
122
|
+
item[c] = "".join(item[c])
|
|
87
123
|
|
|
88
124
|
if return_mode == "record":
|
|
89
125
|
for record in batch:
|
|
@@ -93,14 +129,21 @@ def _infer_batch(batch, schema, return_mode, cols=None, **kwargs):
|
|
|
93
129
|
yield batch
|
|
94
130
|
|
|
95
131
|
|
|
96
|
-
def iter_content(input_dicts_it, llm, schema, batch_size=1, limit_prompt=None,
|
|
132
|
+
def iter_content(input_dicts_it, llm, schema, batch_size=1, limit_prompt=None,
|
|
133
|
+
infer_mode="batch", return_mode="batch", attempts=1, event_loop=None,
|
|
134
|
+
**kwargs):
|
|
97
135
|
""" This method represent Python API aimed at application of `llm` towards
|
|
98
136
|
iterator of input_dicts via cache_target that refers to the SQLite using
|
|
99
137
|
the given `schema`
|
|
100
138
|
"""
|
|
101
|
-
assert (
|
|
139
|
+
assert (infer_mode in INFER_MODES.keys())
|
|
140
|
+
assert (return_mode in ["batch", "chunk", "record"])
|
|
102
141
|
assert (isinstance(llm, BaseLM))
|
|
103
142
|
|
|
143
|
+
# Setup event loop.
|
|
144
|
+
event_loop = asyncio.get_event_loop_policy().get_event_loop() \
|
|
145
|
+
if event_loop is None else event_loop
|
|
146
|
+
|
|
104
147
|
# Quick initialization of the schema.
|
|
105
148
|
if isinstance(schema, str):
|
|
106
149
|
schema = JsonService.read(schema)
|
|
@@ -112,32 +155,32 @@ def iter_content(input_dicts_it, llm, schema, batch_size=1, limit_prompt=None, r
|
|
|
112
155
|
input_dicts_it
|
|
113
156
|
)
|
|
114
157
|
|
|
158
|
+
handle_batch_func = lambda batch, **handle_kwargs: INFER_MODES[infer_mode](
|
|
159
|
+
llm,
|
|
160
|
+
DataService.limit_prompts(batch, limit=limit_prompt),
|
|
161
|
+
**handle_kwargs
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
# Optional wrapping into attempts.
|
|
165
|
+
if attempts > 1:
|
|
166
|
+
# Optional setup of the logger.
|
|
167
|
+
logger = logging.getLogger(__name__)
|
|
168
|
+
logging.basicConfig(level=logging.INFO)
|
|
169
|
+
|
|
170
|
+
attempt_dec = attempt_wrapper(attempts=attempts,
|
|
171
|
+
delay_sec=kwargs.get("attempt_delay_sec", 1),
|
|
172
|
+
logger=logger)
|
|
173
|
+
handle_batch_func = attempt_dec(handle_batch_func)
|
|
174
|
+
|
|
115
175
|
content_it = (_infer_batch(batch=batch,
|
|
116
|
-
|
|
176
|
+
batch_ind=batch_ind,
|
|
177
|
+
infer_mode=infer_mode,
|
|
178
|
+
handle_batch_func=handle_batch_func,
|
|
179
|
+
handle_missed_value_func=lambda *_: None,
|
|
117
180
|
return_mode=return_mode,
|
|
118
181
|
schema=schema,
|
|
182
|
+
event_loop=event_loop,
|
|
119
183
|
**kwargs)
|
|
120
|
-
for batch in BatchIterator(prompts_it, batch_size=batch_size))
|
|
184
|
+
for batch_ind, batch in enumerate(BatchIterator(prompts_it, batch_size=batch_size)))
|
|
121
185
|
|
|
122
186
|
yield from chain.from_iterable(content_it)
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def init_llm(adapter, **model_kwargs):
|
|
126
|
-
""" This method perform dynamic initialization of LLM from third-party resource.
|
|
127
|
-
"""
|
|
128
|
-
assert (isinstance(adapter, str))
|
|
129
|
-
|
|
130
|
-
# List of the Supported models and their API wrappers.
|
|
131
|
-
models_preset = {
|
|
132
|
-
"dynamic": lambda: dynamic_init(class_dir=CWD, class_filepath=llm_model_name,
|
|
133
|
-
class_name=llm_model_params)(**model_kwargs)
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
# Initialize LLM model.
|
|
137
|
-
params = adapter.split(':')
|
|
138
|
-
llm_model_type = params[0]
|
|
139
|
-
llm_model_name = params[1] if len(params) > 1 else params[-1]
|
|
140
|
-
llm_model_params = ':'.join(params[2:]) if len(params) > 2 else None
|
|
141
|
-
llm = find_by_prefix(d=models_preset, key=llm_model_type)()
|
|
142
|
-
|
|
143
|
-
return llm, llm_model_name
|
bulk_chain/core/llm_base.py
CHANGED
|
@@ -1,50 +1,24 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import time
|
|
3
|
-
|
|
4
|
-
|
|
5
1
|
class BaseLM(object):
|
|
6
2
|
|
|
7
|
-
def __init__(self,
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
self.__name = name
|
|
11
|
-
self.__attempts = 1 if attempts is None else attempts
|
|
12
|
-
self.__delay_sec = delay_sec
|
|
13
|
-
self.__support_batching = support_batching
|
|
14
|
-
|
|
15
|
-
if enable_log:
|
|
16
|
-
self.__logger = logging.getLogger(__name__)
|
|
17
|
-
logging.basicConfig(level=logging.INFO)
|
|
18
|
-
|
|
19
|
-
def ask_core(self, batch):
|
|
20
|
-
|
|
21
|
-
for i in range(self.__attempts):
|
|
22
|
-
try:
|
|
23
|
-
if self.__support_batching:
|
|
24
|
-
# Launch in batch mode.
|
|
25
|
-
content = batch
|
|
26
|
-
else:
|
|
27
|
-
# Launch in non-batch mode.
|
|
28
|
-
assert len(batch) == 1, "The LM does not support batching," \
|
|
29
|
-
f" while size of the content is {len(batch)} which is not equal 1. " \
|
|
30
|
-
f"Please enable batch-supporting or set required inference settings."
|
|
31
|
-
content = batch[0]
|
|
32
|
-
|
|
33
|
-
response = self.ask(content)
|
|
3
|
+
def __init__(self, **kwargs):
|
|
4
|
+
pass
|
|
34
5
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
if self.__logger is not None:
|
|
40
|
-
self.__logger.info("Unable to infer the result. Try {} out of {}.".format(i, self.__attempts))
|
|
41
|
-
self.__logger.info(e)
|
|
42
|
-
time.sleep(self.__delay_sec)
|
|
6
|
+
def ask(self, content):
|
|
7
|
+
""" Assumes to return str.
|
|
8
|
+
"""
|
|
9
|
+
raise NotImplemented()
|
|
43
10
|
|
|
44
|
-
|
|
11
|
+
def ask_stream(self, content):
|
|
12
|
+
""" Assumes to return generator.
|
|
13
|
+
"""
|
|
14
|
+
raise NotImplemented()
|
|
45
15
|
|
|
46
|
-
def
|
|
16
|
+
async def ask_async(self, prompt):
|
|
17
|
+
""" Assumes to return co-routine.
|
|
18
|
+
"""
|
|
47
19
|
raise NotImplemented()
|
|
48
20
|
|
|
49
|
-
def
|
|
50
|
-
return
|
|
21
|
+
async def ask_stream_async(self, batch):
|
|
22
|
+
""" Assumes to return AsyncGenerator.
|
|
23
|
+
"""
|
|
24
|
+
raise NotImplemented()
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from typing import AsyncGenerator, Any
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class AsyncioService:
|
|
6
|
+
|
|
7
|
+
@staticmethod
|
|
8
|
+
async def _run_tasks_async(batch, async_handler):
|
|
9
|
+
tasks = [async_handler(prompt) for prompt in batch]
|
|
10
|
+
return await asyncio.gather(*tasks)
|
|
11
|
+
|
|
12
|
+
@staticmethod
|
|
13
|
+
async def _run_generator(gen, output_queue, idx):
|
|
14
|
+
try:
|
|
15
|
+
async for item in gen:
|
|
16
|
+
await output_queue.put((idx, item))
|
|
17
|
+
finally:
|
|
18
|
+
await output_queue.put((idx, StopAsyncIteration))
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@staticmethod
|
|
22
|
+
def run_tasks(event_loop, **tasks_kwargs):
|
|
23
|
+
return event_loop.run_until_complete(AsyncioService._run_tasks_async(**tasks_kwargs))
|
|
24
|
+
|
|
25
|
+
@staticmethod
|
|
26
|
+
async def merge_generators(*gens: AsyncGenerator[Any, None]) -> AsyncGenerator[Any, None]:
|
|
27
|
+
|
|
28
|
+
output_queue = asyncio.Queue()
|
|
29
|
+
tasks = [
|
|
30
|
+
asyncio.create_task(AsyncioService._run_generator(gen, output_queue, idx))
|
|
31
|
+
for idx, gen in enumerate(gens)
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
finished = set()
|
|
35
|
+
while len(finished) < len(tasks):
|
|
36
|
+
idx, item = await output_queue.get()
|
|
37
|
+
if item is StopAsyncIteration:
|
|
38
|
+
finished.add(idx)
|
|
39
|
+
else:
|
|
40
|
+
yield item
|
|
41
|
+
|
|
42
|
+
for task in tasks:
|
|
43
|
+
task.cancel()
|
|
44
|
+
|
|
45
|
+
@staticmethod
|
|
46
|
+
def async_gen_to_iter(gen, loop=None):
|
|
47
|
+
""" This approach is limited. Could be considered as legacy.
|
|
48
|
+
https://stackoverflow.com/questions/71580727/translating-async-generator-into-sync-one/78573267#78573267
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
loop_created = False
|
|
52
|
+
if loop is None:
|
|
53
|
+
loop_created = True
|
|
54
|
+
loop = asyncio.new_event_loop()
|
|
55
|
+
|
|
56
|
+
asyncio.set_event_loop(loop)
|
|
57
|
+
try:
|
|
58
|
+
while True:
|
|
59
|
+
try:
|
|
60
|
+
yield loop.run_until_complete(gen.__anext__())
|
|
61
|
+
except StopAsyncIteration:
|
|
62
|
+
break
|
|
63
|
+
finally:
|
|
64
|
+
if loop_created:
|
|
65
|
+
loop.close()
|
bulk_chain/core/service_batch.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
class BatchIterator:
|
|
2
2
|
|
|
3
3
|
def __init__(self, data_iter, batch_size, end_value=None, filter_func=None):
|
|
4
|
-
assert(isinstance(batch_size, int) and batch_size > 0)
|
|
5
|
-
assert(callable(end_value) or end_value is None)
|
|
4
|
+
assert (isinstance(batch_size, int) and batch_size > 0)
|
|
5
|
+
assert (callable(end_value) or end_value is None)
|
|
6
6
|
self.__data_iter = data_iter
|
|
7
7
|
self.__index = 0
|
|
8
8
|
self.__batch_size = batch_size
|
bulk_chain/core/utils.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import importlib
|
|
2
2
|
import logging
|
|
3
3
|
import sys
|
|
4
|
+
import time
|
|
4
5
|
from collections import Counter
|
|
5
6
|
from os.path import dirname, join, basename
|
|
6
7
|
|
|
@@ -60,10 +61,10 @@ def auto_import(name, is_class=False):
|
|
|
60
61
|
return m() if is_class else m
|
|
61
62
|
|
|
62
63
|
|
|
63
|
-
def dynamic_init(
|
|
64
|
+
def dynamic_init(class_filepath, class_name=None):
|
|
64
65
|
|
|
65
66
|
# Registering path.
|
|
66
|
-
target = join(
|
|
67
|
+
target = join(dirname(class_filepath))
|
|
67
68
|
logger.info(f"Adding sys path for `{target}`")
|
|
68
69
|
sys.path.insert(1, target)
|
|
69
70
|
class_path_list = class_filepath.split('/')
|
|
@@ -89,3 +90,21 @@ def optional_limit_iter(it_data, limit=None):
|
|
|
89
90
|
if limit is not None and counter["returned"] > limit:
|
|
90
91
|
break
|
|
91
92
|
yield data
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def attempt_wrapper(attempts, delay_sec=1, logger=None):
|
|
96
|
+
def decorator(func):
|
|
97
|
+
def wrapper(*args, **kwargs):
|
|
98
|
+
for i in range(attempts):
|
|
99
|
+
try:
|
|
100
|
+
# Do action.
|
|
101
|
+
return func(*args, **kwargs)
|
|
102
|
+
except Exception as e:
|
|
103
|
+
if logger is not None:
|
|
104
|
+
logger.info(f"Unable to infer the result. Try {i} out of {attempts}.")
|
|
105
|
+
logger.info(e)
|
|
106
|
+
if delay_sec is not None:
|
|
107
|
+
time.sleep(delay_sec)
|
|
108
|
+
raise Exception(f"Failed after {attempts} attempts")
|
|
109
|
+
return wrapper
|
|
110
|
+
return decorator
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: bulk_chain
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.1.0
|
|
4
4
|
Summary: A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, ensuring reliable results for bulk input requests.
|
|
5
5
|
Home-page: https://github.com/nicolay-r/bulk-chain
|
|
6
6
|
Author: Nicolay Rusnachenko
|
|
@@ -15,9 +15,8 @@ Classifier: Topic :: Text Processing :: Linguistic
|
|
|
15
15
|
Requires-Python: >=3.6
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: tqdm
|
|
19
18
|
|
|
20
|
-
# bulk-chain 1.
|
|
19
|
+
# bulk-chain 1.1.0
|
|
21
20
|

|
|
22
21
|
[](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
|
|
23
22
|
[](https://x.com/nicolayr_/status/1847969224636961033)
|
|
@@ -78,16 +77,36 @@ Below, is an example on how to declare your own schema:
|
|
|
78
77
|
|
|
79
78
|
# Usage
|
|
80
79
|
|
|
81
|
-
|
|
80
|
+
## 🤖 Prepare
|
|
82
81
|
|
|
83
|
-
1.
|
|
84
|
-
|
|
82
|
+
1. [schema](#chain-of-thought-schema)
|
|
83
|
+
* [Example for Sentiment Analysis](test/schema/thor_cot_schema.json)
|
|
84
|
+
2. **LLM model** from the [<b>Third-party providers hosting</b>↗️](https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm).
|
|
85
|
+
3. Data (iter of dictionaries)
|
|
85
86
|
|
|
87
|
+
## 🚀 Launch
|
|
86
88
|
|
|
89
|
+
> **API**: For more details see the [**related Wiki page**](https://github.com/nicolay-r/bulk-chain/wiki)
|
|
87
90
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
+
```python
|
|
92
|
+
from bulk_chain.core.utils import dynamic_init
|
|
93
|
+
from bulk_chain.api import iter_content
|
|
94
|
+
|
|
95
|
+
content_it = iter_content(
|
|
96
|
+
# 1. Your schema.
|
|
97
|
+
schema="YOUR_SCHEMA.json",
|
|
98
|
+
# 2. Your third-party model implementation.
|
|
99
|
+
llm=dynamic_init(class_filepath="replicate_104.py", class_name="Replicate")(api_token="<API-KEY>"),
|
|
100
|
+
# 3. Customize your inference and result providing modes:
|
|
101
|
+
infer_mode="batch_async",
|
|
102
|
+
return_mode="batch",
|
|
103
|
+
# 4. Your iterator of dictionaries
|
|
104
|
+
input_dicts_it=YOUR_DATA_IT,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
for content in content_it:
|
|
108
|
+
# Handle your LLM responses here ...
|
|
109
|
+
```
|
|
91
110
|
|
|
92
111
|
|
|
93
112
|
# Embed your LLM
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
bulk_chain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
bulk_chain/api.py,sha256=gPGjaHYIn2Ewn6yXIXER-CM5SgXQ3ZJH-SdRyaPDOo0,6890
|
|
3
|
+
bulk_chain/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
+
bulk_chain/core/llm_base.py,sha256=aa73TGW03yLXMHY4b_1NgquRvP0CzH8IWZkcFPABFUg,557
|
|
5
|
+
bulk_chain/core/service_asyncio.py,sha256=S-D4K3LBa3noKTm0tXazluYVI8cBgN1IB6v6MFoMyNQ,1972
|
|
6
|
+
bulk_chain/core/service_batch.py,sha256=lWmjO0aU6h2rmfx_kGmNqt0Rdeaf2a4Dn5VyfKFkfDs,1033
|
|
7
|
+
bulk_chain/core/service_data.py,sha256=OWWHHnr_plwxYTxLuvMrhEc1PbSx-XC3rbFzV0hy3vk,1107
|
|
8
|
+
bulk_chain/core/service_dict.py,sha256=lAghLU-3V3xYGv5BTA327Qcw8UJYmgQRMFdggzlrUgo,383
|
|
9
|
+
bulk_chain/core/service_json.py,sha256=6o1xM_8c9QEjH9Q3qEmJylU9nahfRXhUd5sFF2dGJwo,182
|
|
10
|
+
bulk_chain/core/service_schema.py,sha256=KIP4n0Tz2h1i7SIMGhgAhoiCgUFXOT1rzMt38yACS2U,1154
|
|
11
|
+
bulk_chain/core/utils.py,sha256=tp1FJQBmJt-3QmG7B0hyJNTFyg_8BwTTdl8xTxSgNDk,3140
|
|
12
|
+
bulk_chain-1.1.0.dist-info/LICENSE,sha256=VF9SjNpwwSSFEY_eP_8A1ocDCrbwfjI1pZexXdCkOwo,1076
|
|
13
|
+
bulk_chain-1.1.0.dist-info/METADATA,sha256=EheCGDisKF0TwmzJfnDxW-rgsDVPNpCYGOvuaDn91tw,4428
|
|
14
|
+
bulk_chain-1.1.0.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
15
|
+
bulk_chain-1.1.0.dist-info/top_level.txt,sha256=Hxq_wyH-GDXKBaA63UfBIiMJO2eCHJG5EOrXDphpeB4,11
|
|
16
|
+
bulk_chain-1.1.0.dist-info/RECORD,,
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
bulk_chain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
bulk_chain/api.py,sha256=d_c10Je8wUSnCdQjyWCHVx4FGW6M2_pBMMqKsI_YJaY,5119
|
|
3
|
-
bulk_chain/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
|
-
bulk_chain/core/llm_base.py,sha256=DZ9l4HpCs9uKTZp68miw_XCqmRAJBqQPuYSK889CeUk,1785
|
|
5
|
-
bulk_chain/core/service_batch.py,sha256=LMxrZeQXV_AJAoCaMCHVx8TvjcmCaKUQhNE8K4D8pCo,1031
|
|
6
|
-
bulk_chain/core/service_data.py,sha256=OWWHHnr_plwxYTxLuvMrhEc1PbSx-XC3rbFzV0hy3vk,1107
|
|
7
|
-
bulk_chain/core/service_dict.py,sha256=lAghLU-3V3xYGv5BTA327Qcw8UJYmgQRMFdggzlrUgo,383
|
|
8
|
-
bulk_chain/core/service_json.py,sha256=6o1xM_8c9QEjH9Q3qEmJylU9nahfRXhUd5sFF2dGJwo,182
|
|
9
|
-
bulk_chain/core/service_schema.py,sha256=KIP4n0Tz2h1i7SIMGhgAhoiCgUFXOT1rzMt38yACS2U,1154
|
|
10
|
-
bulk_chain/core/utils.py,sha256=Dx9Gy-jPpk-w_8WUekN0Ij4RBIWVAPg74vA3N0JgGqc,2471
|
|
11
|
-
bulk_chain-1.0.0.dist-info/LICENSE,sha256=VF9SjNpwwSSFEY_eP_8A1ocDCrbwfjI1pZexXdCkOwo,1076
|
|
12
|
-
bulk_chain-1.0.0.dist-info/METADATA,sha256=TR86CmhcHJ3Sep8TlHZ0Ede_PnH8G5iMILUvVvSskJY,3810
|
|
13
|
-
bulk_chain-1.0.0.dist-info/WHEEL,sha256=pL8R0wFFS65tNSRnaOVrsw9EOkOqxLrlUPenUYnJKNo,91
|
|
14
|
-
bulk_chain-1.0.0.dist-info/top_level.txt,sha256=Hxq_wyH-GDXKBaA63UfBIiMJO2eCHJG5EOrXDphpeB4,11
|
|
15
|
-
bulk_chain-1.0.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|