bulk-chain 0.25.0__py3-none-any.whl → 0.25.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
bulk_chain/api.py CHANGED
@@ -7,7 +7,7 @@ from bulk_chain.core.service_data import DataService
7
7
  from bulk_chain.core.service_dict import DictionaryService
8
8
  from bulk_chain.core.service_json import JsonService
9
9
  from bulk_chain.core.service_schema import SchemaService
10
-
10
+ from bulk_chain.core.utils import dynamic_init, find_by_prefix
11
11
 
12
12
  INFER_MODES = {
13
13
  "default": lambda llm, prompt, limit_prompt=None: llm.ask_core(
@@ -76,4 +76,24 @@ def iter_content(input_dicts_it, llm, schema, batch_size=1, return_batch=True, l
76
76
  schema=schema)
77
77
  for batch in BatchIterator(prompts_it, batch_size=batch_size))
78
78
 
79
- yield from content_it if return_batch else chain.from_iterable(content_it)
79
+ yield from content_it if return_batch else chain.from_iterable(content_it)
80
+
81
+
82
+ def init_llm(adapter, **model_kwargs):
83
+ """ This method perform dynamic initialization of LLM from third-party resource.
84
+ """
85
+
86
+ # List of the Supported models and their API wrappers.
87
+ models_preset = {
88
+ "dynamic": lambda: dynamic_init(class_dir=CWD, class_filepath=llm_model_name,
89
+ class_name=llm_model_params)(**model_kwargs)
90
+ }
91
+
92
+ # Initialize LLM model.
93
+ params = adapter.split(':')
94
+ llm_model_type = params[0]
95
+ llm_model_name = params[1] if len(params) > 1 else params[-1]
96
+ llm_model_params = ':'.join(params[2:]) if len(params) > 2 else None
97
+ llm = find_by_prefix(d=models_preset, key=llm_model_type)()
98
+
99
+ return llm, llm_model_name
@@ -24,7 +24,7 @@ class BaseLM(object):
24
24
  try:
25
25
  if self.__support_batching:
26
26
  # Launch in batch mode.
27
- content = self.ask(batch)
27
+ content = batch
28
28
  else:
29
29
  # Launch in non-batch mode.
30
30
  assert len(batch) == 1, "The LM does not support batching," \
@@ -12,6 +12,11 @@ class CmdArgsService:
12
12
  def iter_arguments(lst):
13
13
 
14
14
  def __release():
15
+
16
+ # We use the True value by default to treat the related parameter as flag.
17
+ if len(buf) == 0:
18
+ buf.append(True)
19
+
15
20
  return key, buf if len(buf) > 1 else buf[0]
16
21
 
17
22
  key = None
@@ -29,7 +34,7 @@ class CmdArgsService:
29
34
  buf.append(a)
30
35
 
31
36
  # Sharing the remaining params.
32
- if len(buf) > 0:
37
+ if key is not None:
33
38
  yield __release()
34
39
 
35
40
  @staticmethod
@@ -1,51 +1,44 @@
1
- import logging
2
-
3
1
  from bulk_chain.core.llm_base import BaseLM
4
2
  from bulk_chain.core.service_data import DataService
5
3
  from bulk_chain.core.utils import iter_params
4
+ from bulk_chain.core.utils_logger import StreamedLogger
6
5
 
7
6
 
8
7
  def pad_str(text, pad):
9
8
  return text.rjust(len(text) + pad, ' ')
10
9
 
11
10
 
12
- def text_wrap(content, width, handle_line=lambda l: l):
13
- lines = []
14
- for text in content.split('\n'):
15
- for i in range(0, len(text), width):
16
- line = handle_line(text[i:i + width])
17
- lines.append(line)
18
- return '\n'.join(lines)
19
-
20
-
21
- def nice_output(text, width, pad=4, remove_new_line=False):
11
+ def nice_output(text, remove_new_line=False):
22
12
  short_text = text.replace("\n", "") if remove_new_line else text
23
- return text_wrap(content=short_text, width=width, handle_line=lambda line: pad_str(line, pad=pad))
13
+ return short_text
24
14
 
25
15
 
26
- def chat_with_lm(lm, chain=None, model_name=None):
16
+ def chat_with_lm(lm, preset_dict=None, chain=None, model_name=None, pad=0):
27
17
  assert (isinstance(lm, BaseLM))
28
18
  assert (isinstance(chain, list))
29
19
  assert (isinstance(model_name, str) or model_name is None)
30
20
 
31
- logger = logging.getLogger(__name__)
32
- logging.basicConfig(level=logging.INFO)
21
+ preset_dict = {} if preset_dict is None else preset_dict
22
+
23
+ streamed_logger = StreamedLogger(__name__)
33
24
 
34
25
  do_exit = False
35
26
  model_name = model_name if model_name is not None else "agent"
36
27
 
37
28
  while not do_exit:
38
29
 
39
- logger.info("----------------")
30
+ streamed_logger.info("----------------")
31
+ streamed_logger.info("\n")
40
32
 
41
33
  # Launching the CoT engine loop.
42
- data_dict = {}
43
- for prompt_args in chain:
34
+ data_dict = {} | preset_dict
35
+ for chain_ind, prompt_args in enumerate(chain):
44
36
 
45
37
  # Processing the prompt.
46
38
  prompt = prompt_args["prompt"]
47
39
 
48
40
  # Filling necessary parameters.
41
+ user_informed = False
49
42
  field_names = list(iter_params(prompt))
50
43
  for ind, f_name in enumerate(field_names):
51
44
 
@@ -54,6 +47,7 @@ def chat_with_lm(lm, chain=None, model_name=None):
54
47
 
55
48
  user_input = input(f"Enter your prompt for `{f_name}` ({ind+1}/{len(field_names)}) "
56
49
  f"(or 'exit' to quit): ")
50
+ user_informed = True
57
51
 
58
52
  if user_input.lower() == 'exit':
59
53
  do_exit = True
@@ -64,19 +58,37 @@ def chat_with_lm(lm, chain=None, model_name=None):
64
58
  if do_exit:
65
59
  break
66
60
 
61
+ # In the case of the initial interaction with the chain.
62
+ # we make sure that aware user for starting interaction.
63
+ if chain_ind == 0 and not user_informed:
64
+ user_input = input(f"Enter to continue (or 'exit' to quit) ...")
65
+ if user_input.lower() == 'exit':
66
+ do_exit = True
67
+
67
68
  # Finally asking LLM.
68
69
  DataService.compose_prompt_text(prompt=prompt, data_dict=data_dict, field_names=field_names)
69
70
  actual_prompt = DataService.get_prompt_text(prompt=prompt, data_dict=data_dict)
70
71
 
71
72
  # Returning meta information, passed to LLM.
72
- pad = 4
73
- logger.info(pad_str(f"{model_name} (ask) ->", pad=pad))
74
- logger.info(nice_output(actual_prompt, pad=pad*2, remove_new_line=True, width=80))
73
+ streamed_logger.info(pad_str(f"{model_name} (ask [{chain_ind+1}/{len(chain)}]) ->", pad=pad))
74
+ streamed_logger.info("\n")
75
+ streamed_logger.info(nice_output(actual_prompt, remove_new_line=True))
76
+ streamed_logger.info("\n\n")
75
77
 
76
78
  # Response.
77
- response_batch = lm.ask_core(batch=[actual_prompt])
78
- logger.info(pad_str(f"{model_name} (resp)->", pad=pad))
79
- logger.info(nice_output(response_batch[0], pad=pad * 2, remove_new_line=False, width=80))
79
+ response = lm.ask_core(batch=[actual_prompt])[0]
80
+ streamed_logger.info(pad_str(f"{model_name} (resp [{chain_ind+1}/{len(chain)}])->", pad=pad))
81
+ streamed_logger.info("\n")
82
+ if isinstance(response, str):
83
+ streamed_logger.info(nice_output(response, remove_new_line=False))
84
+ buffer = [response]
85
+ else:
86
+ buffer = []
87
+ for chunk in response:
88
+ streamed_logger.info(chunk)
89
+ buffer.append(str(chunk))
90
+
91
+ streamed_logger.info("\n\n")
80
92
 
81
93
  # Collecting the answer for the next turn.
82
- data_dict[prompt_args["out"]] = response_batch[0]
94
+ data_dict[prompt_args["out"]] = "".join(buffer)
@@ -0,0 +1,41 @@
1
+ import logging
2
+
3
+
4
+ def StreamedLogger(name: str) -> logging.Logger:
5
+ """ https://medium.com/@r.das699/optimizing-logging-practices-for-streaming-data-in-python-521798e1ed82
6
+ """
7
+ root_handlers = logging.getLogger().handlers
8
+ current_logger = logging.getLogger(name)
9
+ if not root_handlers:
10
+ new_handler = logging.StreamHandler()
11
+ new_handler.terminator = ""
12
+ new_handler.setFormatter(logging.Formatter("%(message)s"))
13
+ current_logger.addHandler(new_handler)
14
+ current_logger.propagate = False
15
+ current_logger.setLevel(logging.INFO)
16
+ return current_logger
17
+
18
+ for handler in current_logger.handlers[:]:
19
+ current_logger.removeHandler(handler)
20
+
21
+ for handler_r in root_handlers:
22
+ if type(handler_r) is logging.StreamHandler:
23
+ new_handler = logging.StreamHandler()
24
+ new_handler.terminator = ""
25
+ new_handler.setFormatter(logging.Formatter("%(message)s"))
26
+ current_logger.addHandler(new_handler)
27
+ elif type(handler_r) is logging.FileHandler:
28
+ new_handler = logging.FileHandler(
29
+ handler_r.baseFilename,
30
+ handler_r.mode,
31
+ handler_r.encoding,
32
+ handler_r.delay,
33
+ handler_r.errors,
34
+ )
35
+ new_handler.terminator = "" # This will stop the printing in new line
36
+ new_handler.setFormatter(logging.Formatter("%(message)s"))
37
+ current_logger.addHandler(new_handler)
38
+ else:
39
+ continue
40
+ current_logger.propagate = False # Don't propagate to root logger
41
+ return current_logger
bulk_chain/demo.py ADDED
@@ -0,0 +1,84 @@
1
+ import json
2
+
3
+ import argparse
4
+ import logging
5
+ import sys
6
+
7
+ from source_iter.service_jsonl import JsonlService
8
+
9
+ from bulk_chain.api import init_llm
10
+ from bulk_chain.core.service_args import CmdArgsService
11
+ from bulk_chain.core.service_json import JsonService
12
+ from bulk_chain.core.service_llm import chat_with_lm
13
+ from bulk_chain.core.service_schema import SchemaService
14
+ from bulk_chain.core.utils import parse_filepath
15
+
16
+ logger = logging.getLogger(__name__)
17
+ logging.basicConfig(level=logging.INFO)
18
+
19
+
20
+ def iter_from_json(filepath):
21
+ with open(filepath, "r") as f:
22
+ content = json.load(f)
23
+ for key, value in content.items():
24
+ yield key, value
25
+
26
+
27
+ def iter_from_text_file(filepath):
28
+ with open(filepath, "r") as f:
29
+ yield filepath.split('.')[0], f.read()
30
+
31
+
32
+ if __name__ == '__main__':
33
+
34
+ parser = argparse.ArgumentParser(description="LLM demo usage based on CoT schema")
35
+ parser.add_argument('--adapter', dest='adapter', type=str, default=None)
36
+ parser.add_argument('--attempts', dest='attempts', type=int, default=None)
37
+ parser.add_argument('--src', dest='src', type=str, nargs="*", default=None)
38
+ parser.add_argument('--schema', dest='schema', type=str, default=None,
39
+ help="Path to the JSON file that describes schema")
40
+ parser.add_argument('--limit-prompt', dest="limit_prompt", type=int, default=None,
41
+ help="Optional trimming prompt by the specified amount of characters.")
42
+
43
+ # Extract native arguments.
44
+ native_args = CmdArgsService.extract_native_args(sys.argv, end_prefix="%%")
45
+ args = parser.parse_args(args=native_args[1:])
46
+
47
+ # Extract model-related arguments and Initialize Large Language Model.
48
+ model_args = CmdArgsService.find_grouped_args(lst=sys.argv, starts_with="%%m", end_prefix="%%")
49
+ model_args_dict = CmdArgsService.args_to_dict(model_args) | {"attempts": args.attempts}
50
+ llm, llm_model_name = init_llm(adapter=args.adapter, **model_args_dict)
51
+
52
+ # Setup schema.
53
+ schema = SchemaService(json_data=JsonService.read(args.schema))
54
+ schema_name = schema.src.get("name", None)
55
+ if schema is not None:
56
+ logger.info(f"Using schema: {schema_name}")
57
+
58
+ output_providers = {
59
+ "jsonl": lambda filepath, data_it, header:
60
+ JsonlService.write(target=filepath,
61
+ data_it=map(lambda item: {key: item[i] for i, key in enumerate(header)}, data_it))
62
+ }
63
+
64
+ input_file_handlers = {
65
+ "json": lambda filepath: iter_from_json(filepath),
66
+ "txt": lambda filepath: iter_from_text_file(filepath)
67
+ }
68
+
69
+ # Input extension type defines the provider.
70
+ if args.src is None:
71
+ args.src = []
72
+ if isinstance(args.src, str):
73
+ args.src = [args.src]
74
+ sources = [parse_filepath(s) for s in args.src]
75
+
76
+ preset_dict = {}
77
+ for fp, ext, _ in sources:
78
+ for key, value in input_file_handlers[ext](fp):
79
+ if key in preset_dict:
80
+ raise Exception(f"While at handling {fp}: Key {key} is already registered!")
81
+ preset_dict[key] = value
82
+
83
+ # Launch Demo.
84
+ chat_with_lm(llm, preset_dict=preset_dict, chain=schema.chain, model_name=llm_model_name)
bulk_chain/infer.py CHANGED
@@ -9,51 +9,28 @@ from source_iter.service_jsonl import JsonlService
9
9
  from source_iter.service_sqlite import SQLite3Service
10
10
  from tqdm import tqdm
11
11
 
12
- from bulk_chain.api import INFER_MODES, _infer_batch, CWD
12
+ from bulk_chain.api import INFER_MODES, _infer_batch, CWD, init_llm
13
13
  from bulk_chain.core.llm_base import BaseLM
14
14
  from bulk_chain.core.service_args import CmdArgsService
15
15
  from bulk_chain.core.service_dict import DictionaryService
16
16
  from bulk_chain.core.service_json import JsonService
17
- from bulk_chain.core.service_llm import chat_with_lm
18
17
  from bulk_chain.core.service_schema import SchemaService
19
- from bulk_chain.core.utils import dynamic_init, find_by_prefix, handle_table_name, optional_limit_iter, parse_filepath
18
+ from bulk_chain.core.utils import handle_table_name, optional_limit_iter, parse_filepath
20
19
 
21
20
  logger = logging.getLogger(__name__)
22
21
  logging.basicConfig(level=logging.INFO)
23
22
 
24
-
25
23
  WRITER_PROVIDERS = {
26
24
  "sqlite": lambda filepath, table_name, data_it, infer_data_func, **kwargs: SQLite3Service.write(
27
25
  data_it=data_it, target=filepath, table_name=table_name, data2col_func=infer_data_func,
28
26
  skip_existed=True, **kwargs)
29
27
  }
30
28
 
31
-
32
29
  READER_PROVIDERS = {
33
30
  "sqlite": lambda filepath, table_name: SQLite3Service.read(filepath, table=table_name)
34
31
  }
35
32
 
36
33
 
37
- def init_llm(**model_kwargs):
38
- """ This method perform dynamic initialization of LLM from third-party resource.
39
- """
40
-
41
- # List of the Supported models and their API wrappers.
42
- models_preset = {
43
- "dynamic": lambda: dynamic_init(class_dir=CWD, class_filepath=llm_model_name,
44
- class_name=llm_model_params)(**model_kwargs)
45
- }
46
-
47
- # Initialize LLM model.
48
- params = args.adapter.split(':')
49
- llm_model_type = params[0]
50
- llm_model_name = params[1] if len(params) > 1 else params[-1]
51
- llm_model_params = ':'.join(params[2:]) if len(params) > 2 else None
52
- llm = find_by_prefix(d=models_preset, key=llm_model_type)()
53
-
54
- return llm, llm_model_name
55
-
56
-
57
34
  def iter_content_cached(input_dicts_it, llm, schema, cache_target, limit_prompt=None, **cache_kwargs):
58
35
  assert (isinstance(llm, BaseLM))
59
36
  assert (isinstance(cache_target, str))
@@ -91,9 +68,8 @@ if __name__ == '__main__':
91
68
 
92
69
  parser = argparse.ArgumentParser(description="Infer Instruct LLM inference based on CoT schema")
93
70
  parser.add_argument('--adapter', dest='adapter', type=str, default=None)
94
- parser.add_argument('--attempts', dest='attempts', type=int, default=None)
95
71
  parser.add_argument('--id-col', dest='id_col', type=str, default="uid")
96
- parser.add_argument('--src', dest='src', type=str, default=None)
72
+ parser.add_argument('--src', dest='src', type=str, nargs="?", default=None)
97
73
  parser.add_argument('--schema', dest='schema', type=str, default=None,
98
74
  help="Path to the JSON file that describes schema")
99
75
  parser.add_argument('--to', dest='to', type=str, default=None, choices=["csv", "sqlite"])
@@ -114,7 +90,7 @@ if __name__ == '__main__':
114
90
  # Extract model-related arguments and Initialize Large Language Model.
115
91
  model_args = CmdArgsService.find_grouped_args(lst=sys.argv, starts_with="%%m", end_prefix="%%")
116
92
  model_args_dict = CmdArgsService.args_to_dict(model_args) | {"attempts": args.attempts}
117
- llm, llm_model_name = init_llm(**model_args_dict)
93
+ llm, llm_model_name = init_llm(adapter=args.adapter, **model_args_dict)
118
94
 
119
95
  # Setup schema.
120
96
  schema = SchemaService(json_data=JsonService.read(args.schema))
@@ -123,7 +99,6 @@ if __name__ == '__main__':
123
99
  logger.info(f"Using schema: {schema_name}")
124
100
 
125
101
  input_providers = {
126
- None: lambda _: chat_with_lm(llm, chain=schema.chain, model_name=llm_model_name),
127
102
  "csv": lambda filepath: CsvService.read(src=filepath, row_id_key=args.id_col,
128
103
  as_dict=True, skip_header=True,
129
104
  delimiter=csv_args_dict.get("delimiter", ","),
@@ -155,14 +130,9 @@ if __name__ == '__main__':
155
130
  args.output = args.output.format(model=llm.name()) if args.output is not None else args.output
156
131
  tgt_filepath, tgt_ext, tgt_meta = parse_filepath(args.output, default_ext=args.to)
157
132
 
158
- # Input extension type defines the provider.
133
+ # We do not support multiple files for other modes.
159
134
  src_filepath, src_ext, src_meta = parse_filepath(args.src)
160
135
 
161
- # Check whether we are in chat mode.
162
- if src_ext is None:
163
- input_providers[src_ext](None)
164
- exit(0)
165
-
166
136
  def default_output_file_template(ext):
167
137
  # This is a default template for output files to be generated.
168
138
  return "".join(["_".join([join(CWD, basename(src_filepath)), llm.name(), schema_name]), ext])
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bulk_chain
3
- Version: 0.25.0
3
+ Version: 0.25.2
4
4
  Summary: A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, ensuring reliable results for bulk input requests.
5
5
  Home-page: https://github.com/nicolay-r/bulk-chain
6
6
  Author: Nicolay Rusnachenko
@@ -15,8 +15,10 @@ Classifier: Topic :: Text Processing :: Linguistic
15
15
  Requires-Python: >=3.6
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
+ Requires-Dist: tqdm
19
+ Requires-Dist: source-iter ==0.24.3
18
20
 
19
- # bulk-chain 0.25.0
21
+ # bulk-chain 0.25.2
20
22
  ![](https://img.shields.io/badge/Python-3.9-brightgreen.svg)
21
23
  [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
22
24
  [![twitter](https://img.shields.io/twitter/url/https/shields.io.svg?style=social)](https://x.com/nicolayr_/status/1847969224636961033)
@@ -26,7 +28,13 @@ License-File: LICENSE
26
28
  <img src="logo.png"/>
27
29
  </p>
28
30
 
29
- A lightweight, no-strings-attached **framework** for your LLM that allows applying [Chain-of-Thought](https://arxiv.org/abs/2201.11903) prompt `schema` (See [related section](#chain-of-thought-schema)) towards a massive textual collections.
31
+ <p align="center">
32
+ <a href="https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm"><b>Third-party providers hosting</b>↗️</a>
33
+ <br>
34
+ <a href="https://github.com/nicolay-r/bulk-chain/blob/master/README.md#demo-mode">👉<b>demo</b>👈</a>
35
+ </p>
36
+
37
+ A no-strings-attached **framework** for your LLM that allows applying Chain-of-Thought-alike [prompt `schema`](#chain-of-thought-schema) towards a massive textual collections using custom **[third-party providers ↗️](https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm)**.
30
38
 
31
39
  ### Main Features
32
40
  * ✅ **No-strings**: you're free to LLM dependencies and flexible `venv` customization.
@@ -42,7 +50,7 @@ A lightweight, no-strings-attached **framework** for your LLM that allows apply
42
50
  From PyPI:
43
51
 
44
52
  ```bash
45
- pip install bulk-chain
53
+ pip install --no-deps bulk-chain
46
54
  ```
47
55
 
48
56
  or latest version from here:
@@ -73,37 +81,67 @@ Below, is an example on how to declare your own schema:
73
81
  }
74
82
  ```
75
83
 
76
- Another templates are available [here](/ext/schema/).
77
-
78
84
  # Usage
79
85
 
80
86
  Preliminary steps:
81
87
 
82
88
  1. Define your [schema](#chain-of-thought-schema) ([Example for Sentiment Analysis](/ext/schema/thor_cot_schema.json)))
83
- 2. Wrap or pick **LLM model** from the [list of presets](/ext/).
89
+ 2. Wrap or pick **LLM model** from the [<b>Third-party providers hosting</b>↗️](https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm).
84
90
 
85
- ## API
91
+ ## Shell
86
92
 
87
- Please take a look at the [**related Wiki page**](https://github.com/nicolay-r/bulk-chain/wiki)
93
+ ### Demo Mode
94
+
95
+ **demo mode** to interact with LLM via command line with LLM output streaming support.
96
+ The video below illustrates an example of application for sentiment analysis on author opinion extraction towards mentioned object in text.
97
+
98
+ Quck start with launching demo:
99
+ 1. ⬇️ Download [replicate](https://replicate.com/) provider for `bulk-chain`:
100
+ 2. 📜 Setup your reasoning `thor_cot_schema.json` according to the [following example ↗️](test/schema/thor_cot_schema.json)
101
+ 3. 🚀 Launch `demo.py` as follows:
102
+ ```bash
103
+ python3 -m bulk_chain.demo \
104
+ --schema "test/schema/thor_cot_schema.json" \
105
+ --adapter "dynamic:replicate_104.py:Replicate" \
106
+ %%m \
107
+ --model_name "meta/meta-llama-3-70b-instruct" \
108
+ --api_token "<REPLICATE-API-TOKEN>" \
109
+ --stream
110
+ ```
111
+
112
+ 📺 This video showcase application of the [↗️ Sentiment Analysis Schema](https://github.com/nicolay-r/bulk-chain/blob/master/test/schema/thor_cot_schema.json) towards [LLaMA-3-70B-Instruct](https://replicate.com/meta/meta-llama-3-70b-instruct) hosted by Replicate for reasoning over submitted texts
113
+ ![sa-bulk-chain-cot-final](https://github.com/user-attachments/assets/0cc8fdcb-6ddb-44a3-8f05-d76250ae6423)
88
114
 
89
- ## Shell
90
115
 
91
- > **NOTE:** You have to install `source-iter` package
116
+ ### Inference Mode
92
117
 
118
+ > **NOTE:** You have to install `source-iter` and `tqdm` packages that actual [dependencies](dependencies.txt) of this project
119
+
120
+ 1. ⬇️ Download [replicate](https://replicate.com/) provider for `bulk-chain`:
121
+ ```bash
122
+ wget https://raw.githubusercontent.com/nicolay-r/nlp-thirdgate/refs/heads/master/llm/replicate_104.py
123
+ ```
124
+ 2. 📜 Setup your reasoning `schema.json` according to the [following example ↗️](test/schema/default.json)
125
+ 3. 🚀 Launch inference using `DeepSeek-R1`:
93
126
  ```bash
94
127
  python3 -m bulk_chain.infer \
95
128
  --src "<PATH-TO-YOUR-CSV-or-JSONL>" \
96
- --schema "ext/schema/default.json" \
97
- --adapter "dynamic:ext/replicate.py:Replicate" \
129
+ --schema "test/schema/default.json" \
130
+ --adapter "replicate_104.py:Replicate" \
98
131
  %%m \
99
- --api_token "<REPLICATE-API-TOKEN>" \
100
- --temp 0.1
132
+ --model_name "deepseek-ai/deepseek-r1" \
133
+ --api_token "<REPLICATE-API-TOKEN>"
101
134
  ```
102
135
 
136
+ ## API
137
+
138
+ Please take a look at the [**related Wiki page**](https://github.com/nicolay-r/bulk-chain/wiki)
139
+
140
+
103
141
  # Embed your LLM
104
142
 
105
143
  All you have to do is to implement `BaseLM` class, that includes:
106
144
  * `__init__` -- for setting up *batching mode support* and (optional) *model name*;
107
145
  * `ask(prompt)` -- infer your model with the given `prompt`.
108
146
 
109
- See examples with models [here](/ext).
147
+ See examples with models [at nlp-thirdgate 🌌](https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm).
@@ -0,0 +1,20 @@
1
+ bulk_chain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ bulk_chain/api.py,sha256=3q1t4A5wop_BRgYanFCCSQBiGu38P9ds0hTbuxNIUKQ,3590
3
+ bulk_chain/demo.py,sha256=3mvgEu03EyDDFzXtpx2fxozLITOn9Lo7ati6H1y54S4,3191
4
+ bulk_chain/infer.py,sha256=gq6G48XpOK56g5I_AU2kiQirQgcrZ353kfwjjRfQhSo,8069
5
+ bulk_chain/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ bulk_chain/core/llm_base.py,sha256=fuWxfEOSRYvnoZMOcfnq1E2LIJKnrpsnxQ1z6SmY1nM,1839
7
+ bulk_chain/core/service_args.py,sha256=lq4Veuh4QNu8mlCv8MT9S1rMxTn4FKalyp-3boYonVk,2136
8
+ bulk_chain/core/service_batch.py,sha256=yQr6fbQd4ifQBGMhZMrQQeZpXtDchMKMGJi8XPG7thc,1430
9
+ bulk_chain/core/service_data.py,sha256=ZjJDtd1jrQm9hRCXMqe4CT_qF2XDbWBE1lVibP7tAWo,942
10
+ bulk_chain/core/service_dict.py,sha256=lAghLU-3V3xYGv5BTA327Qcw8UJYmgQRMFdggzlrUgo,383
11
+ bulk_chain/core/service_json.py,sha256=6o1xM_8c9QEjH9Q3qEmJylU9nahfRXhUd5sFF2dGJwo,182
12
+ bulk_chain/core/service_llm.py,sha256=0lFqX02-BHI9OOdC-7hZhpsb9QrhCbKE7In3jhKXq3I,3452
13
+ bulk_chain/core/service_schema.py,sha256=KIP4n0Tz2h1i7SIMGhgAhoiCgUFXOT1rzMt38yACS2U,1154
14
+ bulk_chain/core/utils.py,sha256=UV6Cefaw7yZiYblsCr-s9LsbcI83xe7eESBvha9A2Og,2784
15
+ bulk_chain/core/utils_logger.py,sha256=BD-ADxaeeuHztaYjqtIY_cIzc5r2Svq9XwRtrgIEqyI,1636
16
+ bulk_chain-0.25.2.dist-info/LICENSE,sha256=VF9SjNpwwSSFEY_eP_8A1ocDCrbwfjI1pZexXdCkOwo,1076
17
+ bulk_chain-0.25.2.dist-info/METADATA,sha256=-N7-wOVXryBY1jkARSgWYZUAhdLYZlxkJ8qa8Vuj9no,6037
18
+ bulk_chain-0.25.2.dist-info/WHEEL,sha256=pL8R0wFFS65tNSRnaOVrsw9EOkOqxLrlUPenUYnJKNo,91
19
+ bulk_chain-0.25.2.dist-info/top_level.txt,sha256=Hxq_wyH-GDXKBaA63UfBIiMJO2eCHJG5EOrXDphpeB4,11
20
+ bulk_chain-0.25.2.dist-info/RECORD,,
@@ -1,18 +0,0 @@
1
- bulk_chain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- bulk_chain/api.py,sha256=08i2tgFa_CCA0obC_Yr3rURI6MkuXYKgmuZaLcs4NLk,2807
3
- bulk_chain/infer.py,sha256=oWtBf2itZeM3fD-_QAzABKUMbsl4BqvHmW21TUTr880,9110
4
- bulk_chain/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- bulk_chain/core/llm_base.py,sha256=uX_uibm5y8STfMKNYL64EeF8UowfJGwCD_t-uftHoJE,1849
6
- bulk_chain/core/service_args.py,sha256=x-QHaKLD1d6qaJkD4lNwx7640ku9-6Uyr3mooB_6kLc,1981
7
- bulk_chain/core/service_batch.py,sha256=yQr6fbQd4ifQBGMhZMrQQeZpXtDchMKMGJi8XPG7thc,1430
8
- bulk_chain/core/service_data.py,sha256=ZjJDtd1jrQm9hRCXMqe4CT_qF2XDbWBE1lVibP7tAWo,942
9
- bulk_chain/core/service_dict.py,sha256=lAghLU-3V3xYGv5BTA327Qcw8UJYmgQRMFdggzlrUgo,383
10
- bulk_chain/core/service_json.py,sha256=6o1xM_8c9QEjH9Q3qEmJylU9nahfRXhUd5sFF2dGJwo,182
11
- bulk_chain/core/service_llm.py,sha256=1xbFW5OQY2ckKwIDZjsgNtnxKDp2wDjKKwyNS_yMU2s,2776
12
- bulk_chain/core/service_schema.py,sha256=KIP4n0Tz2h1i7SIMGhgAhoiCgUFXOT1rzMt38yACS2U,1154
13
- bulk_chain/core/utils.py,sha256=UV6Cefaw7yZiYblsCr-s9LsbcI83xe7eESBvha9A2Og,2784
14
- bulk_chain-0.25.0.dist-info/LICENSE,sha256=VF9SjNpwwSSFEY_eP_8A1ocDCrbwfjI1pZexXdCkOwo,1076
15
- bulk_chain-0.25.0.dist-info/METADATA,sha256=-Ky6ZekXHUCBByhSTgDYgMpC64ew8lGmQ7-I9dKsv6U,3874
16
- bulk_chain-0.25.0.dist-info/WHEEL,sha256=pL8R0wFFS65tNSRnaOVrsw9EOkOqxLrlUPenUYnJKNo,91
17
- bulk_chain-0.25.0.dist-info/top_level.txt,sha256=Hxq_wyH-GDXKBaA63UfBIiMJO2eCHJG5EOrXDphpeB4,11
18
- bulk_chain-0.25.0.dist-info/RECORD,,