bulk-chain 0.24.0__tar.gz → 0.24.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/PKG-INFO +8 -3
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/README.md +7 -2
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/provider_sqlite.py +6 -7
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/infer.py +12 -9
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain.egg-info/PKG-INFO +8 -3
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/setup.py +1 -1
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/LICENSE +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/__init__.py +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/__init__.py +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/llm_base.py +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_args.py +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_csv.py +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_data.py +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_json.py +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_llm.py +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_schema.py +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/utils.py +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain.egg-info/SOURCES.txt +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain.egg-info/dependency_links.txt +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain.egg-info/requires.txt +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain.egg-info/top_level.txt +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/setup.cfg +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/test/test_args_seeking.py +0 -0
- {bulk_chain-0.24.0 → bulk_chain-0.24.1}/test/test_cmdargs.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: bulk_chain
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.1
|
|
4
4
|
Summary: A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, ensuring reliable results for bulk input requests.
|
|
5
5
|
Home-page: https://github.com/nicolay-r/bulk-chain
|
|
6
6
|
Author: Nicolay Rusnachenko
|
|
@@ -17,9 +17,14 @@ Description-Content-Type: text/markdown
|
|
|
17
17
|
License-File: LICENSE
|
|
18
18
|
Requires-Dist: tqdm
|
|
19
19
|
|
|
20
|
-
# bulk-chain
|
|
20
|
+
# bulk-chain 0.24.1
|
|
21
21
|

|
|
22
22
|
[](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
|
|
23
|
+
[](https://x.com/nicolayr_/status/1847969224636961033)
|
|
24
|
+
|
|
25
|
+
<p align="center">
|
|
26
|
+
<img src="logo.png"/>
|
|
27
|
+
</p>
|
|
23
28
|
|
|
24
29
|
A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903) framework** for your LLM, ensuring reliable results for bulk input requests stored in `CSV` / `JSONL` / `sqlite`.
|
|
25
30
|
It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
|
|
@@ -33,7 +38,7 @@ It allows applying series of prompts formed into `schema` (See [related section]
|
|
|
33
38
|
# Installation
|
|
34
39
|
|
|
35
40
|
```bash
|
|
36
|
-
pip install
|
|
41
|
+
pip install bulk-chain
|
|
37
42
|
```
|
|
38
43
|
|
|
39
44
|
## Chain-of-Thought Schema
|
|
@@ -1,6 +1,11 @@
|
|
|
1
|
-
# bulk-chain
|
|
1
|
+
# bulk-chain 0.24.1
|
|
2
2
|

|
|
3
3
|
[](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
|
|
4
|
+
[](https://x.com/nicolayr_/status/1847969224636961033)
|
|
5
|
+
|
|
6
|
+
<p align="center">
|
|
7
|
+
<img src="logo.png"/>
|
|
8
|
+
</p>
|
|
4
9
|
|
|
5
10
|
A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903) framework** for your LLM, ensuring reliable results for bulk input requests stored in `CSV` / `JSONL` / `sqlite`.
|
|
6
11
|
It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
|
|
@@ -14,7 +19,7 @@ It allows applying series of prompts formed into `schema` (See [related section]
|
|
|
14
19
|
# Installation
|
|
15
20
|
|
|
16
21
|
```bash
|
|
17
|
-
pip install
|
|
22
|
+
pip install bulk-chain
|
|
18
23
|
```
|
|
19
24
|
|
|
20
25
|
## Chain-of-Thought Schema
|
|
@@ -11,9 +11,9 @@ class SQLiteProvider(object):
|
|
|
11
11
|
sqlite3_column_types = [id_column_type] + sqlite3_column_types
|
|
12
12
|
|
|
13
13
|
# Compose the whole columns list.
|
|
14
|
-
content = ", ".join([" "
|
|
14
|
+
content = ", ".join([f"[{item[0]}] {item[1]}" for item in zip(columns, sqlite3_column_types)])
|
|
15
15
|
cur.execute(f"CREATE TABLE IF NOT EXISTS {table_name}({content})")
|
|
16
|
-
cur.execute(f"CREATE INDEX IF NOT EXISTS
|
|
16
|
+
cur.execute(f"CREATE INDEX IF NOT EXISTS [{id_column_name}] ON {table_name}([{id_column_name}])")
|
|
17
17
|
|
|
18
18
|
@staticmethod
|
|
19
19
|
def write_auto(data_it, target, data2col_func, table_name, id_column_name="id",
|
|
@@ -49,13 +49,13 @@ class SQLiteProvider(object):
|
|
|
49
49
|
[Exception(f"{column} is expected to be in row!") for column in row_columns if column not in columns]
|
|
50
50
|
|
|
51
51
|
uid = data[id_column_name]
|
|
52
|
-
r = cur.execute(f"SELECT EXISTS(SELECT 1 FROM {table_name} WHERE {id_column_name}='{uid}');")
|
|
52
|
+
r = cur.execute(f"SELECT EXISTS(SELECT 1 FROM {table_name} WHERE [{id_column_name}]='{uid}');")
|
|
53
53
|
ans = r.fetchone()[0]
|
|
54
54
|
if ans == 1:
|
|
55
55
|
continue
|
|
56
56
|
|
|
57
57
|
params = ", ".join(tuple(['?'] * (len(columns))))
|
|
58
|
-
row_columns_str = ", ".join(row_columns)
|
|
58
|
+
row_columns_str = ", ".join([f"[{col}]" for col in row_columns])
|
|
59
59
|
cur.execute(f"INSERT INTO {table_name}({row_columns_str}) VALUES ({params})",
|
|
60
60
|
[data2col_func(c, data) for c in row_columns])
|
|
61
61
|
con.commit()
|
|
@@ -63,11 +63,10 @@ class SQLiteProvider(object):
|
|
|
63
63
|
cur.close()
|
|
64
64
|
|
|
65
65
|
@staticmethod
|
|
66
|
-
def
|
|
66
|
+
def iter_rows(target, table="content"):
|
|
67
67
|
with sqlite3.connect(target) as conn:
|
|
68
68
|
cursor = conn.cursor()
|
|
69
|
-
|
|
70
|
-
cursor.execute(f"SELECT {cols} FROM {table}")
|
|
69
|
+
cursor.execute(f"SELECT * FROM {table}")
|
|
71
70
|
for row in cursor:
|
|
72
71
|
yield row
|
|
73
72
|
|
|
@@ -48,7 +48,7 @@ def init_schema(json_filepath):
|
|
|
48
48
|
return SchemaService(json_data=JsonService.read_data(json_filepath))
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
|
|
51
|
+
def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table, id_column_name):
|
|
52
52
|
""" This method represent Python API aimed at application of `llm` towards
|
|
53
53
|
iterator of input_dicts via cache_target that refers to the SQLite using
|
|
54
54
|
the given `schema`
|
|
@@ -79,7 +79,7 @@ def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
|
|
|
79
79
|
data_it=data_it, target=filepath,
|
|
80
80
|
data2col_func=optional_update_data_records,
|
|
81
81
|
table_name=handle_table_name(table_name if table_name is not None else "contents"),
|
|
82
|
-
id_column_name=
|
|
82
|
+
id_column_name=id_column_name)
|
|
83
83
|
}
|
|
84
84
|
|
|
85
85
|
# We optionally wrap into limiter.
|
|
@@ -90,18 +90,17 @@ def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
|
|
|
90
90
|
# Provide data caching.
|
|
91
91
|
cache_providers["sqlite"](cache_target, table_name=tgt_meta, data_it=tqdm(queries_it, desc="Iter content"))
|
|
92
92
|
|
|
93
|
-
return SQLiteProvider.
|
|
93
|
+
return SQLiteProvider.iter_rows(cache_target, table=cache_table)
|
|
94
94
|
|
|
95
95
|
|
|
96
96
|
if __name__ == '__main__':
|
|
97
97
|
|
|
98
98
|
parser = argparse.ArgumentParser(description="Infer Instruct LLM inference based on CoT schema")
|
|
99
99
|
parser.add_argument('--adapter', dest='adapter', type=str, default=None)
|
|
100
|
+
parser.add_argument('--id-col', dest='id_col', type=str, default="uid")
|
|
100
101
|
parser.add_argument('--src', dest='src', type=str, default=None)
|
|
101
102
|
parser.add_argument('--schema', dest='schema', type=str, default=None,
|
|
102
103
|
help="Path to the JSON file that describes schema")
|
|
103
|
-
parser.add_argument('--csv-sep', dest='csv_sep', type=str, default='\t')
|
|
104
|
-
parser.add_argument('--csv-escape-char', dest='csv_escape_char', type=str, default=None)
|
|
105
104
|
parser.add_argument('--to', dest='to', type=str, default=None, choices=["csv", "sqlite"])
|
|
106
105
|
parser.add_argument('--output', dest='output', type=str, default=None)
|
|
107
106
|
parser.add_argument('--limit', dest='limit', type=int, default=None,
|
|
@@ -114,7 +113,8 @@ if __name__ == '__main__':
|
|
|
114
113
|
args = parser.parse_args(args=native_args[1:])
|
|
115
114
|
|
|
116
115
|
# Initialize Large Language Model.
|
|
117
|
-
|
|
116
|
+
model_args_dict = CmdArgsService.args_to_dict(model_args)
|
|
117
|
+
llm, llm_model_name = init_llm(**model_args_dict)
|
|
118
118
|
|
|
119
119
|
# Setup schema.
|
|
120
120
|
schema = init_schema(args.schema)
|
|
@@ -123,9 +123,11 @@ if __name__ == '__main__':
|
|
|
123
123
|
|
|
124
124
|
input_providers = {
|
|
125
125
|
None: lambda _: chat_with_lm(llm, chain=schema.chain, model_name=llm_model_name),
|
|
126
|
-
"csv": lambda filepath: CsvService.read(target=filepath, row_id_key=
|
|
127
|
-
as_dict=True, skip_header=True,
|
|
128
|
-
|
|
126
|
+
"csv": lambda filepath: CsvService.read(target=filepath, row_id_key=args.id_col,
|
|
127
|
+
as_dict=True, skip_header=True,
|
|
128
|
+
delimiter=model_args_dict.get("delimiter", "\t"),
|
|
129
|
+
escapechar=model_args_dict.get("escapechar", None)),
|
|
130
|
+
"jsonl": lambda filepath: JsonService.read_lines(src=filepath, row_id_key=args.id_col)
|
|
129
131
|
}
|
|
130
132
|
|
|
131
133
|
output_providers = {
|
|
@@ -156,6 +158,7 @@ if __name__ == '__main__':
|
|
|
156
158
|
data_it = iter_content(input_dicts_iter=input_providers[src_ext](src_filepath),
|
|
157
159
|
schema=schema,
|
|
158
160
|
llm=llm,
|
|
161
|
+
id_column_name=args.id_col,
|
|
159
162
|
cache_target=cache_target,
|
|
160
163
|
cache_table=cache_table)
|
|
161
164
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: bulk_chain
|
|
3
|
-
Version: 0.24.
|
|
3
|
+
Version: 0.24.1
|
|
4
4
|
Summary: A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, ensuring reliable results for bulk input requests.
|
|
5
5
|
Home-page: https://github.com/nicolay-r/bulk-chain
|
|
6
6
|
Author: Nicolay Rusnachenko
|
|
@@ -17,9 +17,14 @@ Description-Content-Type: text/markdown
|
|
|
17
17
|
License-File: LICENSE
|
|
18
18
|
Requires-Dist: tqdm
|
|
19
19
|
|
|
20
|
-
# bulk-chain
|
|
20
|
+
# bulk-chain 0.24.1
|
|
21
21
|

|
|
22
22
|
[](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
|
|
23
|
+
[](https://x.com/nicolayr_/status/1847969224636961033)
|
|
24
|
+
|
|
25
|
+
<p align="center">
|
|
26
|
+
<img src="logo.png"/>
|
|
27
|
+
</p>
|
|
23
28
|
|
|
24
29
|
A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903) framework** for your LLM, ensuring reliable results for bulk input requests stored in `CSV` / `JSONL` / `sqlite`.
|
|
25
30
|
It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
|
|
@@ -33,7 +38,7 @@ It allows applying series of prompts formed into `schema` (See [related section]
|
|
|
33
38
|
# Installation
|
|
34
39
|
|
|
35
40
|
```bash
|
|
36
|
-
pip install
|
|
41
|
+
pip install bulk-chain
|
|
37
42
|
```
|
|
38
43
|
|
|
39
44
|
## Chain-of-Thought Schema
|
|
@@ -15,7 +15,7 @@ def get_requirements(filenames):
|
|
|
15
15
|
|
|
16
16
|
setup(
|
|
17
17
|
name='bulk_chain',
|
|
18
|
-
version='0.24.
|
|
18
|
+
version='0.24.1',
|
|
19
19
|
python_requires=">=3.6",
|
|
20
20
|
description='A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, '
|
|
21
21
|
'ensuring reliable results for bulk input requests.',
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|