bulk-chain 0.24.0__py3-none-any.whl → 0.24.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,9 +11,9 @@ class SQLiteProvider(object):
11
11
  sqlite3_column_types = [id_column_type] + sqlite3_column_types
12
12
 
13
13
  # Compose the whole columns list.
14
- content = ", ".join([" ".join(item) for item in zip(columns, sqlite3_column_types)])
14
+ content = ", ".join([f"[{item[0]}] {item[1]}" for item in zip(columns, sqlite3_column_types)])
15
15
  cur.execute(f"CREATE TABLE IF NOT EXISTS {table_name}({content})")
16
- cur.execute(f"CREATE INDEX IF NOT EXISTS i_id ON {table_name}({id_column_name})")
16
+ cur.execute(f"CREATE INDEX IF NOT EXISTS [{id_column_name}] ON {table_name}([{id_column_name}])")
17
17
 
18
18
  @staticmethod
19
19
  def write_auto(data_it, target, data2col_func, table_name, id_column_name="id",
@@ -49,13 +49,13 @@ class SQLiteProvider(object):
49
49
  [Exception(f"{column} is expected to be in row!") for column in row_columns if column not in columns]
50
50
 
51
51
  uid = data[id_column_name]
52
- r = cur.execute(f"SELECT EXISTS(SELECT 1 FROM {table_name} WHERE {id_column_name}='{uid}');")
52
+ r = cur.execute(f"SELECT EXISTS(SELECT 1 FROM {table_name} WHERE [{id_column_name}]='{uid}');")
53
53
  ans = r.fetchone()[0]
54
54
  if ans == 1:
55
55
  continue
56
56
 
57
57
  params = ", ".join(tuple(['?'] * (len(columns))))
58
- row_columns_str = ", ".join(row_columns)
58
+ row_columns_str = ", ".join([f"[{col}]" for col in row_columns])
59
59
  cur.execute(f"INSERT INTO {table_name}({row_columns_str}) VALUES ({params})",
60
60
  [data2col_func(c, data) for c in row_columns])
61
61
  con.commit()
@@ -63,11 +63,10 @@ class SQLiteProvider(object):
63
63
  cur.close()
64
64
 
65
65
  @staticmethod
66
- def read(target, column_names=None, table="content"):
66
+ def iter_rows(target, table="content"):
67
67
  with sqlite3.connect(target) as conn:
68
68
  cursor = conn.cursor()
69
- cols = "*" if column_names is None else ",".join(column_names)
70
- cursor.execute(f"SELECT {cols} FROM {table}")
69
+ cursor.execute(f"SELECT * FROM {table}")
71
70
  for row in cursor:
72
71
  yield row
73
72
 
bulk_chain/infer.py CHANGED
@@ -48,7 +48,7 @@ def init_schema(json_filepath):
48
48
  return SchemaService(json_data=JsonService.read_data(json_filepath))
49
49
 
50
50
 
51
- def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
51
+ def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table, id_column_name):
52
52
  """ This method represent Python API aimed at application of `llm` towards
53
53
  iterator of input_dicts via cache_target that refers to the SQLite using
54
54
  the given `schema`
@@ -79,7 +79,7 @@ def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
79
79
  data_it=data_it, target=filepath,
80
80
  data2col_func=optional_update_data_records,
81
81
  table_name=handle_table_name(table_name if table_name is not None else "contents"),
82
- id_column_name="uid")
82
+ id_column_name=id_column_name)
83
83
  }
84
84
 
85
85
  # We optionally wrap into limiter.
@@ -90,18 +90,17 @@ def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
90
90
  # Provide data caching.
91
91
  cache_providers["sqlite"](cache_target, table_name=tgt_meta, data_it=tqdm(queries_it, desc="Iter content"))
92
92
 
93
- return SQLiteProvider.read(cache_target, table=cache_table)
93
+ return SQLiteProvider.iter_rows(cache_target, table=cache_table)
94
94
 
95
95
 
96
96
  if __name__ == '__main__':
97
97
 
98
98
  parser = argparse.ArgumentParser(description="Infer Instruct LLM inference based on CoT schema")
99
99
  parser.add_argument('--adapter', dest='adapter', type=str, default=None)
100
+ parser.add_argument('--id-col', dest='id_col', type=str, default="uid")
100
101
  parser.add_argument('--src', dest='src', type=str, default=None)
101
102
  parser.add_argument('--schema', dest='schema', type=str, default=None,
102
103
  help="Path to the JSON file that describes schema")
103
- parser.add_argument('--csv-sep', dest='csv_sep', type=str, default='\t')
104
- parser.add_argument('--csv-escape-char', dest='csv_escape_char', type=str, default=None)
105
104
  parser.add_argument('--to', dest='to', type=str, default=None, choices=["csv", "sqlite"])
106
105
  parser.add_argument('--output', dest='output', type=str, default=None)
107
106
  parser.add_argument('--limit', dest='limit', type=int, default=None,
@@ -114,7 +113,8 @@ if __name__ == '__main__':
114
113
  args = parser.parse_args(args=native_args[1:])
115
114
 
116
115
  # Initialize Large Language Model.
117
- llm, llm_model_name = init_llm(**CmdArgsService.args_to_dict(model_args))
116
+ model_args_dict = CmdArgsService.args_to_dict(model_args)
117
+ llm, llm_model_name = init_llm(**model_args_dict)
118
118
 
119
119
  # Setup schema.
120
120
  schema = init_schema(args.schema)
@@ -123,9 +123,11 @@ if __name__ == '__main__':
123
123
 
124
124
  input_providers = {
125
125
  None: lambda _: chat_with_lm(llm, chain=schema.chain, model_name=llm_model_name),
126
- "csv": lambda filepath: CsvService.read(target=filepath, row_id_key="uid", delimiter=args.csv_sep,
127
- as_dict=True, skip_header=True, escapechar=args.csv_escape_char),
128
- "jsonl": lambda filepath: JsonService.read_lines(src=filepath, row_id_key="uid")
126
+ "csv": lambda filepath: CsvService.read(target=filepath, row_id_key=args.id_col,
127
+ as_dict=True, skip_header=True,
128
+ delimiter=model_args_dict.get("delimiter", "\t"),
129
+ escapechar=model_args_dict.get("escapechar", None)),
130
+ "jsonl": lambda filepath: JsonService.read_lines(src=filepath, row_id_key=args.id_col)
129
131
  }
130
132
 
131
133
  output_providers = {
@@ -156,6 +158,7 @@ if __name__ == '__main__':
156
158
  data_it = iter_content(input_dicts_iter=input_providers[src_ext](src_filepath),
157
159
  schema=schema,
158
160
  llm=llm,
161
+ id_column_name=args.id_col,
159
162
  cache_target=cache_target,
160
163
  cache_table=cache_table)
161
164
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bulk_chain
3
- Version: 0.24.0
3
+ Version: 0.24.1
4
4
  Summary: A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, ensuring reliable results for bulk input requests.
5
5
  Home-page: https://github.com/nicolay-r/bulk-chain
6
6
  Author: Nicolay Rusnachenko
@@ -17,9 +17,14 @@ Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: tqdm
19
19
 
20
- # bulk-chain
20
+ # bulk-chain 0.24.1
21
21
  ![](https://img.shields.io/badge/Python-3.9-brightgreen.svg)
22
22
  [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
23
+ [![twitter](https://img.shields.io/twitter/url/https/shields.io.svg?style=social)](https://x.com/nicolayr_/status/1847969224636961033)
24
+
25
+ <p align="center">
26
+ <img src="logo.png"/>
27
+ </p>
23
28
 
24
29
  A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903) framework** for your LLM, ensuring reliable results for bulk input requests stored in `CSV` / `JSONL` / `sqlite`.
25
30
  It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
@@ -33,7 +38,7 @@ It allows applying series of prompts formed into `schema` (See [related section]
33
38
  # Installation
34
39
 
35
40
  ```bash
36
- pip install git+https://github.com/nicolay-r/bulk-chain@master
41
+ pip install bulk-chain
37
42
  ```
38
43
 
39
44
  ## Chain-of-Thought Schema
@@ -1,8 +1,8 @@
1
1
  bulk_chain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- bulk_chain/infer.py,sha256=HXFcl_7u5sgybDv_v5_up-Mpe-zSX0vtgsG1Wh1h-UA,7184
2
+ bulk_chain/infer.py,sha256=hD9GJEp6P9PZRBSUCIxK8DaDjsX-oiq8VCe0rAD2EPs,7366
3
3
  bulk_chain/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  bulk_chain/core/llm_base.py,sha256=5js2RJLpNS5t-De-xTpZCbLMgbz3F_b9tU_CtXhy02I,259
5
- bulk_chain/core/provider_sqlite.py,sha256=D7axdeTDvv-ULHKTalFWbeKC3WaYOLI7lVrXFAXkct8,3213
5
+ bulk_chain/core/provider_sqlite.py,sha256=rNUvBt3aGa6Uv4a9RItyMgBZPnFbBdNjnt0Gw81lM3I,3171
6
6
  bulk_chain/core/service_args.py,sha256=Qr3rHsAB8wnajB-DbU-GjiEpRZFP4D6s1lVTpLkPPX4,1294
7
7
  bulk_chain/core/service_csv.py,sha256=-m8tNN9aIqRfJa4sPUX8ZUDP4W0fgnnOR3_0PapepDY,1984
8
8
  bulk_chain/core/service_data.py,sha256=18gQwSCTEsI7XFukq8AE5lDJX_QQRpasaH69g6EddV0,797
@@ -10,8 +10,8 @@ bulk_chain/core/service_json.py,sha256=alYqTQbBjAcCh7anSTOZs1CLJbiWrLPpzLcoADstD
10
10
  bulk_chain/core/service_llm.py,sha256=tYgMphJkXunhxdrThdfI4eM8qQTCZfEM1kabbReVjuQ,2726
11
11
  bulk_chain/core/service_schema.py,sha256=JVhOv2YP5VEtiwOq_zgCzhS2uF_BOATAgg6fmKRf2NQ,1209
12
12
  bulk_chain/core/utils.py,sha256=UV6Cefaw7yZiYblsCr-s9LsbcI83xe7eESBvha9A2Og,2784
13
- bulk_chain-0.24.0.dist-info/LICENSE,sha256=VF9SjNpwwSSFEY_eP_8A1ocDCrbwfjI1pZexXdCkOwo,1076
14
- bulk_chain-0.24.0.dist-info/METADATA,sha256=l_RpSlOGQzuA0buVn7I54XN_c9Fn_5Y6lhNPkqlhYqo,3496
15
- bulk_chain-0.24.0.dist-info/WHEEL,sha256=pL8R0wFFS65tNSRnaOVrsw9EOkOqxLrlUPenUYnJKNo,91
16
- bulk_chain-0.24.0.dist-info/top_level.txt,sha256=Hxq_wyH-GDXKBaA63UfBIiMJO2eCHJG5EOrXDphpeB4,11
17
- bulk_chain-0.24.0.dist-info/RECORD,,
13
+ bulk_chain-0.24.1.dist-info/LICENSE,sha256=VF9SjNpwwSSFEY_eP_8A1ocDCrbwfjI1pZexXdCkOwo,1076
14
+ bulk_chain-0.24.1.dist-info/METADATA,sha256=g5_Sr1pfa8v5lRs0sd7Ldch-uLiV_KfdDXaTHSen-R4,3649
15
+ bulk_chain-0.24.1.dist-info/WHEEL,sha256=pL8R0wFFS65tNSRnaOVrsw9EOkOqxLrlUPenUYnJKNo,91
16
+ bulk_chain-0.24.1.dist-info/top_level.txt,sha256=Hxq_wyH-GDXKBaA63UfBIiMJO2eCHJG5EOrXDphpeB4,11
17
+ bulk_chain-0.24.1.dist-info/RECORD,,