PyPI - bulk-chain - Versions diffs - 0.24.0__tar.gz → 0.24.1__tar.gz - Mend

bulk-chain 0.24.0tar.gz → 0.24.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: bulk_chain
-Version: 0.24.0
+Version: 0.24.1
 Summary: A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, ensuring reliable results for bulk input requests.
 Home-page: https://github.com/nicolay-r/bulk-chain
 Author: Nicolay Rusnachenko
@@ -17,9 +17,14 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: tqdm
-# bulk-chain
+# bulk-chain 0.24.1
 ![](https://img.shields.io/badge/Python-3.9-brightgreen.svg)
 [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
+[![twitter](https://img.shields.io/twitter/url/https/shields.io.svg?style=social)](https://x.com/nicolayr_/status/1847969224636961033)
+<p align="center">
+    <img src="logo.png"/>
+</p>
 A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903) framework** for your LLM, ensuring reliable results for bulk input requests stored in `CSV` / `JSONL` / `sqlite`.
 It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
@@ -33,7 +38,7 @@ It allows applying series of prompts formed into `schema` (See [related section]
 # Installation
 ```bash
-pip install git+https://github.com/nicolay-r/bulk-chain@master
+pip install bulk-chain
 ```
 ## Chain-of-Thought Schema

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/README.md RENAMED Viewed

@@ -1,6 +1,11 @@
-# bulk-chain
+# bulk-chain 0.24.1
 ![](https://img.shields.io/badge/Python-3.9-brightgreen.svg)
 [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
+[![twitter](https://img.shields.io/twitter/url/https/shields.io.svg?style=social)](https://x.com/nicolayr_/status/1847969224636961033)
+<p align="center">
+    <img src="logo.png"/>
+</p>
 A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903) framework** for your LLM, ensuring reliable results for bulk input requests stored in `CSV` / `JSONL` / `sqlite`.
 It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
@@ -14,7 +19,7 @@ It allows applying series of prompts formed into `schema` (See [related section]
 # Installation
 ```bash
-pip install git+https://github.com/nicolay-r/bulk-chain@master
+pip install bulk-chain
 ```
 ## Chain-of-Thought Schema

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/provider_sqlite.py RENAMED Viewed

@@ -11,9 +11,9 @@ class SQLiteProvider(object):
         sqlite3_column_types = [id_column_type] + sqlite3_column_types
         # Compose the whole columns list.
-        content = ", ".join([" ".join(item) for item in zip(columns, sqlite3_column_types)])
+        content = ", ".join([f"[{item[0]}] {item[1]}" for item in zip(columns, sqlite3_column_types)])
         cur.execute(f"CREATE TABLE IF NOT EXISTS {table_name}({content})")
-        cur.execute(f"CREATE INDEX IF NOT EXISTS i_id ON {table_name}({id_column_name})")
+        cur.execute(f"CREATE INDEX IF NOT EXISTS [{id_column_name}] ON {table_name}([{id_column_name}])")
     @staticmethod
     def write_auto(data_it, target, data2col_func, table_name, id_column_name="id",
@@ -49,13 +49,13 @@ class SQLiteProvider(object):
                 [Exception(f"{column} is expected to be in row!") for column in row_columns if column not in columns]
                 uid = data[id_column_name]
-                r = cur.execute(f"SELECT EXISTS(SELECT 1 FROM {table_name} WHERE {id_column_name}='{uid}');")
+                r = cur.execute(f"SELECT EXISTS(SELECT 1 FROM {table_name} WHERE [{id_column_name}]='{uid}');")
                 ans = r.fetchone()[0]
                 if ans == 1:
                     continue
                 params = ", ".join(tuple(['?'] * (len(columns))))
-                row_columns_str = ", ".join(row_columns)
+                row_columns_str = ", ".join([f"[{col}]" for col in row_columns])
                 cur.execute(f"INSERT INTO {table_name}({row_columns_str}) VALUES ({params})",
                             [data2col_func(c, data) for c in row_columns])
                 con.commit()
@@ -63,11 +63,10 @@ class SQLiteProvider(object):
             cur.close()
     @staticmethod
-    def read(target, column_names=None, table="content"):
+    def iter_rows(target, table="content"):
         with sqlite3.connect(target) as conn:
             cursor = conn.cursor()
-            cols = "*" if column_names is None else ",".join(column_names)
-            cursor.execute(f"SELECT {cols} FROM {table}")
+            cursor.execute(f"SELECT * FROM {table}")
             for row in cursor:
                 yield row

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/infer.py RENAMED Viewed

@@ -48,7 +48,7 @@ def init_schema(json_filepath):
     return SchemaService(json_data=JsonService.read_data(json_filepath))
-def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
+def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table, id_column_name):
     """ This method represent Python API aimed at application of `llm` towards
         iterator of input_dicts via cache_target that refers to the SQLite using
         the given `schema`
@@ -79,7 +79,7 @@ def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
             data_it=data_it, target=filepath,
             data2col_func=optional_update_data_records,
             table_name=handle_table_name(table_name if table_name is not None else "contents"),
-            id_column_name="uid")
+            id_column_name=id_column_name)
     }
     # We optionally wrap into limiter.
@@ -90,18 +90,17 @@ def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
     # Provide data caching.
     cache_providers["sqlite"](cache_target, table_name=tgt_meta, data_it=tqdm(queries_it, desc="Iter content"))
-    return SQLiteProvider.read(cache_target, table=cache_table)
+    return SQLiteProvider.iter_rows(cache_target, table=cache_table)
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description="Infer Instruct LLM inference based on CoT schema")
     parser.add_argument('--adapter', dest='adapter', type=str, default=None)
+    parser.add_argument('--id-col', dest='id_col', type=str, default="uid")
     parser.add_argument('--src', dest='src', type=str, default=None)
     parser.add_argument('--schema', dest='schema', type=str, default=None,
                         help="Path to the JSON file that describes schema")
-    parser.add_argument('--csv-sep', dest='csv_sep', type=str, default='\t')
-    parser.add_argument('--csv-escape-char', dest='csv_escape_char', type=str, default=None)
     parser.add_argument('--to', dest='to', type=str, default=None, choices=["csv", "sqlite"])
     parser.add_argument('--output', dest='output', type=str, default=None)
     parser.add_argument('--limit', dest='limit', type=int, default=None,
@@ -114,7 +113,8 @@ if __name__ == '__main__':
     args = parser.parse_args(args=native_args[1:])
     # Initialize Large Language Model.
-    llm, llm_model_name = init_llm(**CmdArgsService.args_to_dict(model_args))
+    model_args_dict = CmdArgsService.args_to_dict(model_args)
+    llm, llm_model_name = init_llm(**model_args_dict)
     # Setup schema.
     schema = init_schema(args.schema)
@@ -123,9 +123,11 @@ if __name__ == '__main__':
     input_providers = {
         None: lambda _: chat_with_lm(llm, chain=schema.chain, model_name=llm_model_name),
-        "csv": lambda filepath: CsvService.read(target=filepath, row_id_key="uid", delimiter=args.csv_sep,
-                                                as_dict=True, skip_header=True, escapechar=args.csv_escape_char),
-        "jsonl": lambda filepath: JsonService.read_lines(src=filepath, row_id_key="uid")
+        "csv": lambda filepath: CsvService.read(target=filepath, row_id_key=args.id_col,
+                                                as_dict=True, skip_header=True,
+                                                delimiter=model_args_dict.get("delimiter", "\t"),
+                                                escapechar=model_args_dict.get("escapechar", None)),
+        "jsonl": lambda filepath: JsonService.read_lines(src=filepath, row_id_key=args.id_col)
     }
     output_providers = {
@@ -156,6 +158,7 @@ if __name__ == '__main__':
     data_it = iter_content(input_dicts_iter=input_providers[src_ext](src_filepath),
                            schema=schema,
                            llm=llm,
+                           id_column_name=args.id_col,
                            cache_target=cache_target,
                            cache_table=cache_table)

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: bulk_chain
-Version: 0.24.0
+Version: 0.24.1
 Summary: A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, ensuring reliable results for bulk input requests.
 Home-page: https://github.com/nicolay-r/bulk-chain
 Author: Nicolay Rusnachenko
@@ -17,9 +17,14 @@ Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: tqdm
-# bulk-chain
+# bulk-chain 0.24.1
 ![](https://img.shields.io/badge/Python-3.9-brightgreen.svg)
 [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
+[![twitter](https://img.shields.io/twitter/url/https/shields.io.svg?style=social)](https://x.com/nicolayr_/status/1847969224636961033)
+<p align="center">
+    <img src="logo.png"/>
+</p>
 A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903) framework** for your LLM, ensuring reliable results for bulk input requests stored in `CSV` / `JSONL` / `sqlite`.
 It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
@@ -33,7 +38,7 @@ It allows applying series of prompts formed into `schema` (See [related section]
 # Installation
 ```bash
-pip install git+https://github.com/nicolay-r/bulk-chain@master
+pip install bulk-chain
 ```
 ## Chain-of-Thought Schema

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/setup.py RENAMED Viewed

@@ -15,7 +15,7 @@ def get_requirements(filenames):
 setup(
     name='bulk_chain',
-    version='0.24.0',
+    version='0.24.1',
     python_requires=">=3.6",
     description='A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, '
                 'ensuring reliable results for bulk input requests.',

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/LICENSE RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/__init__.py RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/__init__.py RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/llm_base.py RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_args.py RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_csv.py RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_data.py RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_json.py RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_llm.py RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/service_schema.py RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain/core/utils.py RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain.egg-info/requires.txt RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/bulk_chain.egg-info/top_level.txt RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/setup.cfg RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/test/test_args_seeking.py RENAMED Viewed

File without changes

{bulk_chain-0.24.0 → bulk_chain-0.24.1}/test/test_cmdargs.py RENAMED Viewed

File without changes

bulk-chain 0.24.0__tar.gz → 0.24.1__tar.gz

bulk-chain 0.24.0tar.gz → 0.24.1tar.gz