bulk-chain 0.24.1__tar.gz → 0.25.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/PKG-INFO +37 -25
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/README.md +36 -23
- bulk_chain-0.25.0/bulk_chain/api.py +79 -0
- bulk_chain-0.25.0/bulk_chain/core/llm_base.py +52 -0
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/bulk_chain/core/service_args.py +25 -6
- bulk_chain-0.25.0/bulk_chain/core/service_batch.py +51 -0
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/bulk_chain/core/service_data.py +4 -0
- bulk_chain-0.25.0/bulk_chain/core/service_dict.py +10 -0
- bulk_chain-0.25.0/bulk_chain/core/service_json.py +10 -0
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/bulk_chain/core/service_llm.py +9 -9
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/bulk_chain/core/service_schema.py +1 -2
- bulk_chain-0.25.0/bulk_chain/infer.py +191 -0
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/bulk_chain.egg-info/PKG-INFO +37 -25
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/bulk_chain.egg-info/SOURCES.txt +4 -3
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/setup.py +1 -1
- bulk_chain-0.25.0/test/test_api.py +42 -0
- bulk_chain-0.25.0/test/test_cmdargs.py +20 -0
- bulk_chain-0.24.1/bulk_chain/core/llm_base.py +0 -13
- bulk_chain-0.24.1/bulk_chain/core/provider_sqlite.py +0 -78
- bulk_chain-0.24.1/bulk_chain/core/service_csv.py +0 -57
- bulk_chain-0.24.1/bulk_chain/core/service_json.py +0 -26
- bulk_chain-0.24.1/bulk_chain/infer.py +0 -173
- bulk_chain-0.24.1/bulk_chain.egg-info/requires.txt +0 -1
- bulk_chain-0.24.1/test/test_cmdargs.py +0 -9
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/LICENSE +0 -0
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/bulk_chain/__init__.py +0 -0
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/bulk_chain/core/__init__.py +0 -0
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/bulk_chain/core/utils.py +0 -0
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/bulk_chain.egg-info/dependency_links.txt +0 -0
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/bulk_chain.egg-info/top_level.txt +0 -0
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/setup.cfg +0 -0
- {bulk_chain-0.24.1 → bulk_chain-0.25.0}/test/test_args_seeking.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: bulk_chain
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.25.0
|
|
4
4
|
Summary: A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, ensuring reliable results for bulk input requests.
|
|
5
5
|
Home-page: https://github.com/nicolay-r/bulk-chain
|
|
6
6
|
Author: Nicolay Rusnachenko
|
|
@@ -15,32 +15,42 @@ Classifier: Topic :: Text Processing :: Linguistic
|
|
|
15
15
|
Requires-Python: >=3.6
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: tqdm
|
|
19
18
|
|
|
20
|
-
# bulk-chain 0.
|
|
19
|
+
# bulk-chain 0.25.0
|
|
21
20
|

|
|
22
21
|
[](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
|
|
23
22
|
[](https://x.com/nicolayr_/status/1847969224636961033)
|
|
23
|
+
[](https://pypistats.org/packages/bulk-chain)
|
|
24
24
|
|
|
25
25
|
<p align="center">
|
|
26
26
|
<img src="logo.png"/>
|
|
27
27
|
</p>
|
|
28
28
|
|
|
29
|
-
A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903)
|
|
30
|
-
It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
|
|
29
|
+
A lightweight, no-strings-attached **framework** for your LLM that allows applying [Chain-of-Thought](https://arxiv.org/abs/2201.11903) prompt `schema` (See [related section](#chain-of-thought-schema)) towards a massive textual collections.
|
|
31
30
|
|
|
32
|
-
### Features
|
|
31
|
+
### Main Features
|
|
33
32
|
* ✅ **No-strings**: you're free to LLM dependencies and flexible `venv` customization.
|
|
34
|
-
* ✅ **Provides iterator over infinite amount of input contexts** served in `CSV`/`JSONL`.
|
|
35
|
-
* ✅ **Progress caching**: withstanding exception during LLM calls by using `sqlite3` engine for caching LLM answers;
|
|
36
33
|
* ✅ **Support schemas descriptions** for Chain-of-Thought concept.
|
|
34
|
+
* ✅ **Provides iterator over infinite amount of input contexts** served in `CSV`/`JSONL`.
|
|
35
|
+
|
|
36
|
+
### Extra Features
|
|
37
|
+
* ✅ **Progress caching [for remote LLMs]**: withstanding exception during LLM calls by using `sqlite3` engine for caching LLM answers;
|
|
38
|
+
|
|
37
39
|
|
|
38
40
|
# Installation
|
|
39
41
|
|
|
42
|
+
From PyPI:
|
|
43
|
+
|
|
40
44
|
```bash
|
|
41
45
|
pip install bulk-chain
|
|
42
46
|
```
|
|
43
47
|
|
|
48
|
+
or latest version from here:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install git+https://github.com/nicolay-r/bulk-chain@master
|
|
52
|
+
```
|
|
53
|
+
|
|
44
54
|
## Chain-of-Thought Schema
|
|
45
55
|
|
|
46
56
|
To declare Chain-of-Though (CoT) schema, this project exploits `JSON` format.
|
|
@@ -63,35 +73,37 @@ Below, is an example on how to declare your own schema:
|
|
|
63
73
|
}
|
|
64
74
|
```
|
|
65
75
|
|
|
66
|
-
Another templates are available [here](/ext/schema/
|
|
76
|
+
Another templates are available [here](/ext/schema/).
|
|
67
77
|
|
|
68
78
|
# Usage
|
|
69
79
|
|
|
70
|
-
|
|
80
|
+
Preliminary steps:
|
|
71
81
|
|
|
72
|
-
1. Define your [
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
82
|
+
1. Define your [schema](#chain-of-thought-schema) ([Example for Sentiment Analysis](/ext/schema/thor_cot_schema.json)))
|
|
83
|
+
2. Wrap or pick **LLM model** from the [list of presets](/ext/).
|
|
84
|
+
|
|
85
|
+
## API
|
|
86
|
+
|
|
87
|
+
Please take a look at the [**related Wiki page**](https://github.com/nicolay-r/bulk-chain/wiki)
|
|
88
|
+
|
|
89
|
+
## Shell
|
|
90
|
+
|
|
91
|
+
> **NOTE:** You have to install `source-iter` package
|
|
80
92
|
|
|
81
|
-
3. Launch inference in (chat mode):
|
|
82
93
|
```bash
|
|
83
|
-
|
|
84
|
-
--
|
|
85
|
-
--
|
|
86
|
-
|
|
87
|
-
|
|
94
|
+
python3 -m bulk_chain.infer \
|
|
95
|
+
--src "<PATH-TO-YOUR-CSV-or-JSONL>" \
|
|
96
|
+
--schema "ext/schema/default.json" \
|
|
97
|
+
--adapter "dynamic:ext/replicate.py:Replicate" \
|
|
98
|
+
%%m \
|
|
99
|
+
--api_token "<REPLICATE-API-TOKEN>" \
|
|
88
100
|
--temp 0.1
|
|
89
101
|
```
|
|
90
102
|
|
|
91
103
|
# Embed your LLM
|
|
92
104
|
|
|
93
105
|
All you have to do is to implement `BaseLM` class, that includes:
|
|
94
|
-
* `__init__` -- for
|
|
106
|
+
* `__init__` -- for setting up *batching mode support* and (optional) *model name*;
|
|
95
107
|
* `ask(prompt)` -- infer your model with the given `prompt`.
|
|
96
108
|
|
|
97
109
|
See examples with models [here](/ext).
|
|
@@ -1,27 +1,38 @@
|
|
|
1
|
-
# bulk-chain 0.
|
|
1
|
+
# bulk-chain 0.25.0
|
|
2
2
|

|
|
3
3
|
[](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
|
|
4
4
|
[](https://x.com/nicolayr_/status/1847969224636961033)
|
|
5
|
+
[](https://pypistats.org/packages/bulk-chain)
|
|
5
6
|
|
|
6
7
|
<p align="center">
|
|
7
8
|
<img src="logo.png"/>
|
|
8
9
|
</p>
|
|
9
10
|
|
|
10
|
-
A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903)
|
|
11
|
-
It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
|
|
11
|
+
A lightweight, no-strings-attached **framework** for your LLM that allows applying [Chain-of-Thought](https://arxiv.org/abs/2201.11903) prompt `schema` (See [related section](#chain-of-thought-schema)) towards a massive textual collections.
|
|
12
12
|
|
|
13
|
-
### Features
|
|
13
|
+
### Main Features
|
|
14
14
|
* ✅ **No-strings**: you're free to LLM dependencies and flexible `venv` customization.
|
|
15
|
-
* ✅ **Provides iterator over infinite amount of input contexts** served in `CSV`/`JSONL`.
|
|
16
|
-
* ✅ **Progress caching**: withstanding exception during LLM calls by using `sqlite3` engine for caching LLM answers;
|
|
17
15
|
* ✅ **Support schemas descriptions** for Chain-of-Thought concept.
|
|
16
|
+
* ✅ **Provides iterator over infinite amount of input contexts** served in `CSV`/`JSONL`.
|
|
17
|
+
|
|
18
|
+
### Extra Features
|
|
19
|
+
* ✅ **Progress caching [for remote LLMs]**: withstanding exception during LLM calls by using `sqlite3` engine for caching LLM answers;
|
|
20
|
+
|
|
18
21
|
|
|
19
22
|
# Installation
|
|
20
23
|
|
|
24
|
+
From PyPI:
|
|
25
|
+
|
|
21
26
|
```bash
|
|
22
27
|
pip install bulk-chain
|
|
23
28
|
```
|
|
24
29
|
|
|
30
|
+
or latest version from here:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install git+https://github.com/nicolay-r/bulk-chain@master
|
|
34
|
+
```
|
|
35
|
+
|
|
25
36
|
## Chain-of-Thought Schema
|
|
26
37
|
|
|
27
38
|
To declare Chain-of-Though (CoT) schema, this project exploits `JSON` format.
|
|
@@ -44,35 +55,37 @@ Below, is an example on how to declare your own schema:
|
|
|
44
55
|
}
|
|
45
56
|
```
|
|
46
57
|
|
|
47
|
-
Another templates are available [here](/ext/schema/
|
|
58
|
+
Another templates are available [here](/ext/schema/).
|
|
48
59
|
|
|
49
60
|
# Usage
|
|
50
61
|
|
|
51
|
-
|
|
62
|
+
Preliminary steps:
|
|
52
63
|
|
|
53
|
-
1. Define your [
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
64
|
+
1. Define your [schema](#chain-of-thought-schema) ([Example for Sentiment Analysis](/ext/schema/thor_cot_schema.json)))
|
|
65
|
+
2. Wrap or pick **LLM model** from the [list of presets](/ext/).
|
|
66
|
+
|
|
67
|
+
## API
|
|
68
|
+
|
|
69
|
+
Please take a look at the [**related Wiki page**](https://github.com/nicolay-r/bulk-chain/wiki)
|
|
70
|
+
|
|
71
|
+
## Shell
|
|
72
|
+
|
|
73
|
+
> **NOTE:** You have to install `source-iter` package
|
|
61
74
|
|
|
62
|
-
3. Launch inference in (chat mode):
|
|
63
75
|
```bash
|
|
64
|
-
|
|
65
|
-
--
|
|
66
|
-
--
|
|
67
|
-
|
|
68
|
-
|
|
76
|
+
python3 -m bulk_chain.infer \
|
|
77
|
+
--src "<PATH-TO-YOUR-CSV-or-JSONL>" \
|
|
78
|
+
--schema "ext/schema/default.json" \
|
|
79
|
+
--adapter "dynamic:ext/replicate.py:Replicate" \
|
|
80
|
+
%%m \
|
|
81
|
+
--api_token "<REPLICATE-API-TOKEN>" \
|
|
69
82
|
--temp 0.1
|
|
70
83
|
```
|
|
71
84
|
|
|
72
85
|
# Embed your LLM
|
|
73
86
|
|
|
74
87
|
All you have to do is to implement `BaseLM` class, that includes:
|
|
75
|
-
* `__init__` -- for
|
|
88
|
+
* `__init__` -- for setting up *batching mode support* and (optional) *model name*;
|
|
76
89
|
* `ask(prompt)` -- infer your model with the given `prompt`.
|
|
77
90
|
|
|
78
91
|
See examples with models [here](/ext).
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from itertools import chain
|
|
3
|
+
|
|
4
|
+
from bulk_chain.core.llm_base import BaseLM
|
|
5
|
+
from bulk_chain.core.service_batch import BatchIterator, BatchService
|
|
6
|
+
from bulk_chain.core.service_data import DataService
|
|
7
|
+
from bulk_chain.core.service_dict import DictionaryService
|
|
8
|
+
from bulk_chain.core.service_json import JsonService
|
|
9
|
+
from bulk_chain.core.service_schema import SchemaService
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
INFER_MODES = {
|
|
13
|
+
"default": lambda llm, prompt, limit_prompt=None: llm.ask_core(
|
|
14
|
+
prompt[:limit_prompt] if limit_prompt is not None else prompt),
|
|
15
|
+
"batch": lambda llm, batch, limit_prompt=None: llm.ask_core(
|
|
16
|
+
DataService.limit_prompts(batch, limit=limit_prompt))
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
CWD = os.getcwd()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def _update_batch_content(c, batch, schema, infer_func):
|
|
24
|
+
assert (isinstance(batch, list))
|
|
25
|
+
assert (isinstance(c, str))
|
|
26
|
+
|
|
27
|
+
if c in schema.p2r:
|
|
28
|
+
for batch_item in batch:
|
|
29
|
+
batch_item[c] = DataService.get_prompt_text(prompt=batch_item[c]["prompt"], data_dict=batch_item)
|
|
30
|
+
if c in schema.r2p:
|
|
31
|
+
p_column = schema.r2p[c]
|
|
32
|
+
# This instruction takes a lot of time in a non-batching mode.
|
|
33
|
+
BatchService.handle_param_as_batch(batch=batch,
|
|
34
|
+
src_param=p_column,
|
|
35
|
+
tgt_param=c,
|
|
36
|
+
handle_func=lambda b: infer_func(b))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _infer_batch(batch, schema, infer_func, cols=None):
|
|
40
|
+
assert (isinstance(batch, list))
|
|
41
|
+
assert (callable(infer_func))
|
|
42
|
+
|
|
43
|
+
if len(batch) == 0:
|
|
44
|
+
return batch
|
|
45
|
+
|
|
46
|
+
if cols is None:
|
|
47
|
+
first_item = batch[0]
|
|
48
|
+
cols = first_item.keys() if cols is None else cols
|
|
49
|
+
|
|
50
|
+
for c in cols:
|
|
51
|
+
_update_batch_content(c=c, batch=batch, schema=schema, infer_func=infer_func)
|
|
52
|
+
|
|
53
|
+
return batch
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def iter_content(input_dicts_it, llm, schema, batch_size=1, return_batch=True, limit_prompt=None):
|
|
57
|
+
""" This method represent Python API aimed at application of `llm` towards
|
|
58
|
+
iterator of input_dicts via cache_target that refers to the SQLite using
|
|
59
|
+
the given `schema`
|
|
60
|
+
"""
|
|
61
|
+
assert (isinstance(llm, BaseLM))
|
|
62
|
+
|
|
63
|
+
# Quick initialization of the schema.
|
|
64
|
+
if isinstance(schema, str):
|
|
65
|
+
schema = JsonService.read(schema)
|
|
66
|
+
if isinstance(schema, dict):
|
|
67
|
+
schema = SchemaService(json_data=schema)
|
|
68
|
+
|
|
69
|
+
prompts_it = map(
|
|
70
|
+
lambda data: DictionaryService.custom_update(src_dict=data, other_dict=schema.cot_args),
|
|
71
|
+
input_dicts_it
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
content_it = (_infer_batch(batch=batch,
|
|
75
|
+
infer_func=lambda batch: INFER_MODES["batch"](llm, batch, limit_prompt),
|
|
76
|
+
schema=schema)
|
|
77
|
+
for batch in BatchIterator(prompts_it, batch_size=batch_size))
|
|
78
|
+
|
|
79
|
+
yield from content_it if return_batch else chain.from_iterable(content_it)
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from bulk_chain.core.utils import format_model_name
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class BaseLM(object):
|
|
8
|
+
|
|
9
|
+
def __init__(self, name=None, attempts=None, delay_sec=1, enable_log=True,
|
|
10
|
+
support_batching=False, **kwargs):
|
|
11
|
+
|
|
12
|
+
self.__name = name
|
|
13
|
+
self.__attempts = 1 if attempts is None else attempts
|
|
14
|
+
self.__delay_sec = delay_sec
|
|
15
|
+
self.__support_batching = support_batching
|
|
16
|
+
|
|
17
|
+
if enable_log:
|
|
18
|
+
self.__logger = logging.getLogger(__name__)
|
|
19
|
+
logging.basicConfig(level=logging.INFO)
|
|
20
|
+
|
|
21
|
+
def ask_core(self, batch):
|
|
22
|
+
|
|
23
|
+
for i in range(self.__attempts):
|
|
24
|
+
try:
|
|
25
|
+
if self.__support_batching:
|
|
26
|
+
# Launch in batch mode.
|
|
27
|
+
content = self.ask(batch)
|
|
28
|
+
else:
|
|
29
|
+
# Launch in non-batch mode.
|
|
30
|
+
assert len(batch) == 1, "The LM does not support batching," \
|
|
31
|
+
f" while size of the content is {len(batch)} which is not equal 1. " \
|
|
32
|
+
f"Please enable batch-supporting or set required inference settings."
|
|
33
|
+
content = batch[0]
|
|
34
|
+
|
|
35
|
+
response = self.ask(content)
|
|
36
|
+
|
|
37
|
+
# Wrapping into batch the response in the case of non-batching mode.
|
|
38
|
+
return response if self.__support_batching else [response]
|
|
39
|
+
|
|
40
|
+
except Exception as e:
|
|
41
|
+
if self.__logger is not None:
|
|
42
|
+
self.__logger.info("Unable to infer the result. Try {} out of {}.".format(i, self.__attempts))
|
|
43
|
+
self.__logger.info(e)
|
|
44
|
+
time.sleep(self.__delay_sec)
|
|
45
|
+
|
|
46
|
+
raise Exception("Can't infer")
|
|
47
|
+
|
|
48
|
+
def ask(self, content):
|
|
49
|
+
raise NotImplemented()
|
|
50
|
+
|
|
51
|
+
def name(self):
|
|
52
|
+
return format_model_name(self.__name)
|
|
@@ -33,14 +33,33 @@ class CmdArgsService:
|
|
|
33
33
|
yield __release()
|
|
34
34
|
|
|
35
35
|
@staticmethod
|
|
36
|
-
def
|
|
36
|
+
def __find_suffix_ind(lst, idx_from, end_prefix):
|
|
37
|
+
for i in range(idx_from, len(lst)):
|
|
38
|
+
if lst[i].startswith(end_prefix):
|
|
39
|
+
return i
|
|
40
|
+
return len(lst)
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def extract_native_args(lst, end_prefix):
|
|
44
|
+
return lst[:CmdArgsService.__find_suffix_ind(lst, idx_from=0, end_prefix=end_prefix)]
|
|
45
|
+
|
|
46
|
+
@staticmethod
|
|
47
|
+
def find_grouped_args(lst, starts_with, end_prefix):
|
|
37
48
|
"""Slices a list in two, cutting on index matching "sep"
|
|
38
49
|
"""
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
return
|
|
50
|
+
|
|
51
|
+
# Checking the presence of starts_with.
|
|
52
|
+
# We have to return empty content in the case of absence starts_with in the lst.
|
|
53
|
+
if starts_with not in lst:
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
# Assigning start index.
|
|
57
|
+
idx_from = lst.index(starts_with) + 1
|
|
58
|
+
|
|
59
|
+
# Assigning end index.
|
|
60
|
+
idx_to = CmdArgsService.__find_suffix_ind(lst, idx_from=idx_from, end_prefix=end_prefix)
|
|
61
|
+
|
|
62
|
+
return lst[idx_from:idx_to]
|
|
44
63
|
|
|
45
64
|
@staticmethod
|
|
46
65
|
def args_to_dict(args):
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
class BatchService(object):
|
|
2
|
+
|
|
3
|
+
@staticmethod
|
|
4
|
+
def handle_param_as_batch(batch, src_param, tgt_param, handle_func):
|
|
5
|
+
assert (isinstance(batch, list))
|
|
6
|
+
assert (isinstance(src_param, str))
|
|
7
|
+
assert (callable(handle_func))
|
|
8
|
+
|
|
9
|
+
_batch = [item[src_param] for item in batch]
|
|
10
|
+
|
|
11
|
+
# Do handling for the batch.
|
|
12
|
+
_handled_batch = handle_func(_batch)
|
|
13
|
+
assert (isinstance(_handled_batch, list))
|
|
14
|
+
|
|
15
|
+
# Apply changes.
|
|
16
|
+
for i, item in enumerate(batch):
|
|
17
|
+
item[tgt_param] = _handled_batch[i]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BatchIterator:
|
|
21
|
+
|
|
22
|
+
def __init__(self, data_iter, batch_size, end_value=None):
|
|
23
|
+
assert(isinstance(batch_size, int) and batch_size > 0)
|
|
24
|
+
assert(callable(end_value) or end_value is None)
|
|
25
|
+
self.__data_iter = data_iter
|
|
26
|
+
self.__index = 0
|
|
27
|
+
self.__batch_size = batch_size
|
|
28
|
+
self.__end_value = end_value
|
|
29
|
+
|
|
30
|
+
def __iter__(self):
|
|
31
|
+
return self
|
|
32
|
+
|
|
33
|
+
def __next__(self):
|
|
34
|
+
buffer = []
|
|
35
|
+
while True:
|
|
36
|
+
try:
|
|
37
|
+
data = next(self.__data_iter)
|
|
38
|
+
except StopIteration:
|
|
39
|
+
break
|
|
40
|
+
buffer.append(data)
|
|
41
|
+
if len(buffer) == self.__batch_size:
|
|
42
|
+
break
|
|
43
|
+
|
|
44
|
+
if len(buffer) > 0:
|
|
45
|
+
self.__index += 1
|
|
46
|
+
return buffer
|
|
47
|
+
|
|
48
|
+
if self.__end_value is None:
|
|
49
|
+
raise StopIteration
|
|
50
|
+
else:
|
|
51
|
+
return self.__end_value()
|
|
@@ -20,3 +20,7 @@ class DataService(object):
|
|
|
20
20
|
field_names = list(parse_fields_func(prompt))
|
|
21
21
|
return DataService.compose_prompt_text(
|
|
22
22
|
prompt=prompt, data_dict=data_dict, field_names=field_names)
|
|
23
|
+
|
|
24
|
+
@staticmethod
|
|
25
|
+
def limit_prompts(prompts_list, limit=None):
|
|
26
|
+
return [p[:limit] if limit is not None else p for p in prompts_list]
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
class DictionaryService:
|
|
2
|
+
|
|
3
|
+
@staticmethod
|
|
4
|
+
def custom_update(src_dict, other_dict):
|
|
5
|
+
for k, v in other_dict.items():
|
|
6
|
+
if k in src_dict:
|
|
7
|
+
raise Exception(f"The key `{k}` is already defined in both dicts with values: "
|
|
8
|
+
f"`{src_dict[k]}` (src) and `{v}` (other)")
|
|
9
|
+
src_dict[k] = v
|
|
10
|
+
return src_dict
|
|
@@ -4,9 +4,6 @@ from bulk_chain.core.llm_base import BaseLM
|
|
|
4
4
|
from bulk_chain.core.service_data import DataService
|
|
5
5
|
from bulk_chain.core.utils import iter_params
|
|
6
6
|
|
|
7
|
-
logger = logging.getLogger(__name__)
|
|
8
|
-
logging.basicConfig(level=logging.INFO)
|
|
9
|
-
|
|
10
7
|
|
|
11
8
|
def pad_str(text, pad):
|
|
12
9
|
return text.rjust(len(text) + pad, ' ')
|
|
@@ -27,9 +24,12 @@ def nice_output(text, width, pad=4, remove_new_line=False):
|
|
|
27
24
|
|
|
28
25
|
|
|
29
26
|
def chat_with_lm(lm, chain=None, model_name=None):
|
|
30
|
-
assert(isinstance(lm, BaseLM))
|
|
31
|
-
assert(isinstance(chain, list))
|
|
32
|
-
assert(isinstance(model_name, str) or model_name is None)
|
|
27
|
+
assert (isinstance(lm, BaseLM))
|
|
28
|
+
assert (isinstance(chain, list))
|
|
29
|
+
assert (isinstance(model_name, str) or model_name is None)
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
logging.basicConfig(level=logging.INFO)
|
|
33
33
|
|
|
34
34
|
do_exit = False
|
|
35
35
|
model_name = model_name if model_name is not None else "agent"
|
|
@@ -74,9 +74,9 @@ def chat_with_lm(lm, chain=None, model_name=None):
|
|
|
74
74
|
logger.info(nice_output(actual_prompt, pad=pad*2, remove_new_line=True, width=80))
|
|
75
75
|
|
|
76
76
|
# Response.
|
|
77
|
-
|
|
77
|
+
response_batch = lm.ask_core(batch=[actual_prompt])
|
|
78
78
|
logger.info(pad_str(f"{model_name} (resp)->", pad=pad))
|
|
79
|
-
logger.info(nice_output(
|
|
79
|
+
logger.info(nice_output(response_batch[0], pad=pad * 2, remove_new_line=False, width=80))
|
|
80
80
|
|
|
81
81
|
# Collecting the answer for the next turn.
|
|
82
|
-
data_dict[prompt_args["out"]] =
|
|
82
|
+
data_dict[prompt_args["out"]] = response_batch[0]
|
|
@@ -2,12 +2,11 @@ class SchemaService(object):
|
|
|
2
2
|
|
|
3
3
|
def __init__(self, json_data):
|
|
4
4
|
self.src = json_data
|
|
5
|
-
self.name = self.src["name"]
|
|
6
5
|
self.r2p, self.p2r, self.cot_args, self.chain = SchemaService.__init_schema(prompts=json_data["schema"])
|
|
7
6
|
|
|
8
7
|
@classmethod
|
|
9
8
|
def from_prompt(cls, prompt):
|
|
10
|
-
prompt_schema = {"
|
|
9
|
+
prompt_schema = {"schema": [{"prompt": prompt, "out": "response", "in": "prompt"}]}
|
|
11
10
|
return cls(prompt_schema)
|
|
12
11
|
|
|
13
12
|
@staticmethod
|