bulk-chain 0.25.3__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/PKG-INFO +29 -58
  2. bulk_chain-1.1.0/README.md +100 -0
  3. bulk_chain-1.1.0/bulk_chain/api.py +186 -0
  4. bulk_chain-1.1.0/bulk_chain/core/llm_base.py +24 -0
  5. bulk_chain-1.1.0/bulk_chain/core/service_asyncio.py +65 -0
  6. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain/core/service_batch.py +2 -21
  7. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain/core/utils.py +21 -24
  8. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain.egg-info/PKG-INFO +29 -58
  9. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain.egg-info/SOURCES.txt +4 -11
  10. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/setup.py +1 -2
  11. bulk_chain-1.1.0/test/test_api.py +67 -0
  12. bulk_chain-0.25.3/test/test_provider_batching.py → bulk_chain-1.1.0/test/test_api_batching.py +3 -5
  13. bulk_chain-1.1.0/test/test_api_streaming.py +21 -0
  14. bulk_chain-1.1.0/test/test_replicate_async_baseline.py +11 -0
  15. bulk_chain-1.1.0/test/test_replicate_async_batch_async.py +37 -0
  16. bulk_chain-0.25.3/README.md +0 -127
  17. bulk_chain-0.25.3/bulk_chain/api.py +0 -128
  18. bulk_chain-0.25.3/bulk_chain/core/llm_base.py +0 -52
  19. bulk_chain-0.25.3/bulk_chain/core/provider_sqlite.py +0 -127
  20. bulk_chain-0.25.3/bulk_chain/core/service_args.py +0 -72
  21. bulk_chain-0.25.3/bulk_chain/core/service_llm.py +0 -68
  22. bulk_chain-0.25.3/bulk_chain/core/utils_logger.py +0 -41
  23. bulk_chain-0.25.3/bulk_chain/demo.py +0 -84
  24. bulk_chain-0.25.3/bulk_chain/infer.py +0 -193
  25. bulk_chain-0.25.3/bulk_chain.egg-info/requires.txt +0 -2
  26. bulk_chain-0.25.3/test/test.py +0 -62
  27. bulk_chain-0.25.3/test/test_api.py +0 -46
  28. bulk_chain-0.25.3/test/test_api_streaming.py +0 -42
  29. bulk_chain-0.25.3/test/test_args_seeking.py +0 -26
  30. bulk_chain-0.25.3/test/test_cmdargs.py +0 -29
  31. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/LICENSE +0 -0
  32. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain/__init__.py +0 -0
  33. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain/core/__init__.py +0 -0
  34. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain/core/service_data.py +0 -0
  35. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain/core/service_dict.py +0 -0
  36. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain/core/service_json.py +0 -0
  37. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain/core/service_schema.py +0 -0
  38. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain.egg-info/dependency_links.txt +0 -0
  39. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/bulk_chain.egg-info/top_level.txt +0 -0
  40. {bulk_chain-0.25.3 → bulk_chain-1.1.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bulk_chain
3
- Version: 0.25.3
3
+ Version: 1.1.0
4
4
  Summary: A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, ensuring reliable results for bulk input requests.
5
5
  Home-page: https://github.com/nicolay-r/bulk-chain
6
6
  Author: Nicolay Rusnachenko
@@ -15,10 +15,8 @@ Classifier: Topic :: Text Processing :: Linguistic
15
15
  Requires-Python: >=3.6
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
- Requires-Dist: tqdm
19
- Requires-Dist: source-iter==0.24.3
20
18
 
21
- # bulk-chain 0.25.3
19
+ # bulk-chain 1.1.0
22
20
  ![](https://img.shields.io/badge/Python-3.9-brightgreen.svg)
23
21
  [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
24
22
  [![twitter](https://img.shields.io/twitter/url/https/shields.io.svg?style=social)](https://x.com/nicolayr_/status/1847969224636961033)
@@ -31,7 +29,7 @@ Requires-Dist: source-iter==0.24.3
31
29
  <p align="center">
32
30
  <a href="https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm"><b>Third-party providers hosting</b>↗️</a>
33
31
  <br>
34
- <a href="https://github.com/nicolay-r/bulk-chain/blob/master/README.md#demo-mode">👉<b>demo</b>👈</a>
32
+ <a href="https://github.com/nicolay-r/bulk-chain-shell">👉<b>demo</b>👈</a>
35
33
  </p>
36
34
 
37
35
  A no-strings-attached **framework** for your LLM that allows applying Chain-of-Thought-alike [prompt `schema`](#chain-of-thought-schema) towards a massive textual collections using custom **[third-party providers ↗️](https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm)**.
@@ -39,11 +37,7 @@ A no-strings-attached **framework** for your LLM that allows applying Chain-of-
39
37
  ### Main Features
40
38
  * ✅ **No-strings**: you're free to LLM dependencies and flexible `venv` customization.
41
39
  * ✅ **Support schemas descriptions** for Chain-of-Thought concept.
42
- * ✅ **Provides iterator over infinite amount of input contexts** served in `CSV`/`JSONL`.
43
-
44
- ### Extra Features
45
- * ✅ **Progress caching [for remote LLMs]**: withstanding exception during LLM calls by using `sqlite3` engine for caching LLM answers;
46
-
40
+ * ✅ **Provides iterator over infinite amount of input contexts**
47
41
 
48
42
  # Installation
49
43
 
@@ -83,60 +77,37 @@ Below, is an example on how to declare your own schema:
83
77
 
84
78
  # Usage
85
79
 
86
- Preliminary steps:
87
-
88
- 1. Define your [schema](#chain-of-thought-schema) ([Example for Sentiment Analysis](/ext/schema/thor_cot_schema.json)))
89
- 2. Wrap or pick **LLM model** from the [<b>Third-party providers hosting</b>↗️](https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm).
90
-
91
- ## Shell
92
-
93
- ### Demo Mode
94
-
95
- **demo mode** to interact with LLM via command line with LLM output streaming support.
96
- The video below illustrates an example of application for sentiment analysis on author opinion extraction towards mentioned object in text.
97
-
98
- Quck start with launching demo:
99
- 1. ⬇️ Download [replicate](https://replicate.com/) provider for `bulk-chain`:
100
- 2. 📜 Setup your reasoning `thor_cot_schema.json` according to the [following example ↗️](test/schema/thor_cot_schema.json)
101
- 3. 🚀 Launch `demo.py` as follows:
102
- ```bash
103
- python3 -m bulk_chain.demo \
104
- --schema "test/schema/thor_cot_schema.json" \
105
- --adapter "dynamic:replicate_104.py:Replicate" \
106
- %%m \
107
- --model_name "meta/meta-llama-3-70b-instruct" \
108
- --api_token "<REPLICATE-API-TOKEN>" \
109
- --stream
110
- ```
111
-
112
- 📺 This video showcase application of the [↗️ Sentiment Analysis Schema](https://github.com/nicolay-r/bulk-chain/blob/master/test/schema/thor_cot_schema.json) towards [LLaMA-3-70B-Instruct](https://replicate.com/meta/meta-llama-3-70b-instruct) hosted by Replicate for reasoning over submitted texts
113
- ![sa-bulk-chain-cot-final](https://github.com/user-attachments/assets/0cc8fdcb-6ddb-44a3-8f05-d76250ae6423)
80
+ ## 🤖 Prepare
114
81
 
82
+ 1. [schema](#chain-of-thought-schema)
83
+ * [Example for Sentiment Analysis](test/schema/thor_cot_schema.json)
84
+ 2. **LLM model** from the [<b>Third-party providers hosting</b>↗️](https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm).
85
+ 3. Data (iter of dictionaries)
115
86
 
116
- ### Inference Mode
87
+ ## 🚀 Launch
117
88
 
118
- > **NOTE:** You have to install `source-iter` and `tqdm` packages that actual [dependencies](dependencies.txt) of this project
89
+ > **API**: For more details see the [**related Wiki page**](https://github.com/nicolay-r/bulk-chain/wiki)
119
90
 
120
- 1. ⬇️ Download [replicate](https://replicate.com/) provider for `bulk-chain`:
121
- ```bash
122
- wget https://raw.githubusercontent.com/nicolay-r/nlp-thirdgate/refs/heads/master/llm/replicate_104.py
123
- ```
124
- 2. 📜 Setup your reasoning `schema.json` according to the [following example ↗️](test/schema/default.json)
125
- 3. 🚀 Launch inference using `DeepSeek-R1`:
126
- ```bash
127
- python3 -m bulk_chain.infer \
128
- --src "<PATH-TO-YOUR-CSV-or-JSONL>" \
129
- --schema "test/schema/default.json" \
130
- --adapter "replicate_104.py:Replicate" \
131
- %%m \
132
- --model_name "deepseek-ai/deepseek-r1" \
133
- --api_token "<REPLICATE-API-TOKEN>"
91
+ ```python
92
+ from bulk_chain.core.utils import dynamic_init
93
+ from bulk_chain.api import iter_content
94
+
95
+ content_it = iter_content(
96
+ # 1. Your schema.
97
+ schema="YOUR_SCHEMA.json",
98
+ # 2. Your third-party model implementation.
99
+ llm=dynamic_init(class_filepath="replicate_104.py", class_name="Replicate")(api_token="<API-KEY>"),
100
+ # 3. Customize your inference and result providing modes:
101
+ infer_mode="batch_async",
102
+ return_mode="batch",
103
+ # 4. Your iterator of dictionaries
104
+ input_dicts_it=YOUR_DATA_IT,
105
+ )
106
+
107
+ for content in content_it:
108
+ # Handle your LLM responses here ...
134
109
  ```
135
110
 
136
- ## API
137
-
138
- Please take a look at the [**related Wiki page**](https://github.com/nicolay-r/bulk-chain/wiki)
139
-
140
111
 
141
112
  # Embed your LLM
142
113
 
@@ -0,0 +1,100 @@
1
+ # bulk-chain 1.1.0
2
+ ![](https://img.shields.io/badge/Python-3.9-brightgreen.svg)
3
+ [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
4
+ [![twitter](https://img.shields.io/twitter/url/https/shields.io.svg?style=social)](https://x.com/nicolayr_/status/1847969224636961033)
5
+ [![PyPI downloads](https://img.shields.io/pypi/dm/bulk-chain.svg)](https://pypistats.org/packages/bulk-chain)
6
+
7
+ <p align="center">
8
+ <img src="logo.png"/>
9
+ </p>
10
+
11
+ <p align="center">
12
+ <a href="https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm"><b>Third-party providers hosting</b>↗️</a>
13
+ <br>
14
+ <a href="https://github.com/nicolay-r/bulk-chain-shell">👉<b>demo</b>👈</a>
15
+ </p>
16
+
17
+ A no-strings-attached **framework** for your LLM that allows applying Chain-of-Thought-alike [prompt `schema`](#chain-of-thought-schema) towards a massive textual collections using custom **[third-party providers ↗️](https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm)**.
18
+
19
+ ### Main Features
20
+ * ✅ **No-strings**: you're free to LLM dependencies and flexible `venv` customization.
21
+ * ✅ **Support schemas descriptions** for Chain-of-Thought concept.
22
+ * ✅ **Provides iterator over infinite amount of input contexts**
23
+
24
+ # Installation
25
+
26
+ From PyPI:
27
+
28
+ ```bash
29
+ pip install --no-deps bulk-chain
30
+ ```
31
+
32
+ or latest version from here:
33
+
34
+ ```bash
35
+ pip install git+https://github.com/nicolay-r/bulk-chain@master
36
+ ```
37
+
38
+ ## Chain-of-Thought Schema
39
+
40
+ To declare Chain-of-Though (CoT) schema, this project exploits `JSON` format.
41
+ This format adopts `name` field for declaring a name and `schema` is a list of CoT instructions for the Large Language Model.
42
+
43
+ Each step represents a dictionary with `prompt` and `out` keys that corresponds to the input prompt and output variable name respectively.
44
+ All the variable names are expected to be mentioned in `{}`.
45
+
46
+ Below, is an example on how to declare your own schema:
47
+
48
+ ```python
49
+ {
50
+ "name": "schema-name",
51
+ "schema": [
52
+ {"prompt": "Given the question '{text}', let's think step-by-step.",
53
+ "out": "steps"},
54
+ {"prompt": "For the question '{text}' the reasoining steps are '{steps}'. what would be an answer?",
55
+ "out": "answer"},
56
+ ]
57
+ }
58
+ ```
59
+
60
+ # Usage
61
+
62
+ ## 🤖 Prepare
63
+
64
+ 1. [schema](#chain-of-thought-schema)
65
+ * [Example for Sentiment Analysis](test/schema/thor_cot_schema.json)
66
+ 2. **LLM model** from the [<b>Third-party providers hosting</b>↗️](https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm).
67
+ 3. Data (iter of dictionaries)
68
+
69
+ ## 🚀 Launch
70
+
71
+ > **API**: For more details see the [**related Wiki page**](https://github.com/nicolay-r/bulk-chain/wiki)
72
+
73
+ ```python
74
+ from bulk_chain.core.utils import dynamic_init
75
+ from bulk_chain.api import iter_content
76
+
77
+ content_it = iter_content(
78
+ # 1. Your schema.
79
+ schema="YOUR_SCHEMA.json",
80
+ # 2. Your third-party model implementation.
81
+ llm=dynamic_init(class_filepath="replicate_104.py", class_name="Replicate")(api_token="<API-KEY>"),
82
+ # 3. Customize your inference and result providing modes:
83
+ infer_mode="batch_async",
84
+ return_mode="batch",
85
+ # 4. Your iterator of dictionaries
86
+ input_dicts_it=YOUR_DATA_IT,
87
+ )
88
+
89
+ for content in content_it:
90
+ # Handle your LLM responses here ...
91
+ ```
92
+
93
+
94
+ # Embed your LLM
95
+
96
+ All you have to do is to implement `BaseLM` class, that includes:
97
+ * `__init__` -- for setting up *batching mode support* and (optional) *model name*;
98
+ * `ask(prompt)` -- infer your model with the given `prompt`.
99
+
100
+ See examples with models [at nlp-thirdgate 🌌](https://github.com/nicolay-r/nlp-thirdgate?tab=readme-ov-file#llm).
@@ -0,0 +1,186 @@
1
+ import asyncio
2
+ import collections
3
+ import logging
4
+ import os
5
+ from itertools import chain
6
+
7
+ from bulk_chain.core.llm_base import BaseLM
8
+ from bulk_chain.core.service_asyncio import AsyncioService
9
+ from bulk_chain.core.service_batch import BatchIterator
10
+ from bulk_chain.core.service_data import DataService
11
+ from bulk_chain.core.service_dict import DictionaryService
12
+ from bulk_chain.core.service_json import JsonService
13
+ from bulk_chain.core.service_schema import SchemaService
14
+ from bulk_chain.core.utils import attempt_wrapper
15
+
16
+
17
+ INFER_MODES = {
18
+ "single": lambda llm, batch, **kwargs: [llm.ask(prompt) for prompt in batch],
19
+ "single_stream": lambda llm, batch, **kwargs: [llm.ask_stream(prompt) for prompt in batch],
20
+ "batch": lambda llm, batch, **kwargs: llm.ask(batch),
21
+ "batch_async": lambda llm, batch, **kwargs: AsyncioService.run_tasks(
22
+ batch=batch, async_handler=llm.ask_async, event_loop=kwargs.get("event_loop")
23
+ ),
24
+ "batch_stream_async": lambda llm, batch, **kwargs: AsyncioService.run_tasks(
25
+ batch=batch, async_handler=llm.ask_stream_async, event_loop=kwargs.get("event_loop")
26
+ ),
27
+ }
28
+
29
+
30
+ CWD = os.getcwd()
31
+
32
+
33
+ def _iter_batch_prompts(c, batch_content_it, **kwargs):
34
+ for ind_in_batch, entry in enumerate(batch_content_it):
35
+ content = DataService.get_prompt_text(
36
+ prompt=entry[c]["prompt"],
37
+ data_dict=entry,
38
+ handle_missed_func=kwargs["handle_missed_value_func"])
39
+ yield ind_in_batch, content
40
+
41
+
42
+ def __handle_agen_to_gen(handle, batch, event_loop):
43
+ """ This handler provides conversion of the async generator to generator (sync).
44
+ """
45
+
46
+ def __wrap_with_index(async_gens):
47
+ async def wrapper(index, agen):
48
+ async for item in agen:
49
+ yield index, item
50
+ return [wrapper(i, agen) for i, agen in enumerate(async_gens)]
51
+
52
+ agen_list = handle(batch, event_loop=event_loop)
53
+
54
+ it = AsyncioService.async_gen_to_iter(
55
+ gen=AsyncioService.merge_generators(*__wrap_with_index(agen_list)),
56
+ loop=event_loop)
57
+
58
+ for ind_in_batch, chunk in it:
59
+ yield ind_in_batch, str(chunk)
60
+
61
+
62
+ def __handle_gen(handle, batch, event_loop):
63
+ """ This handler deals with the iteration of each individual element of the batch.
64
+ """
65
+
66
+ def _iter_entry_content(entry):
67
+ if isinstance(entry, str):
68
+ yield entry
69
+ elif isinstance(entry, collections.abc.Iterable):
70
+ for chunk in map(lambda item: str(item), entry):
71
+ yield chunk
72
+ else:
73
+ raise Exception(f"Non supported type `{type(entry)}` for handling output from batch")
74
+
75
+ for ind_in_batch, entry in enumerate(handle(batch, event_loop=event_loop)):
76
+ for chunk in _iter_entry_content(entry=entry):
77
+ yield ind_in_batch, chunk
78
+
79
+
80
+ def _iter_chunks(p_column, batch_content_it, **kwargs):
81
+ handler = __handle_agen_to_gen if kwargs["infer_mode"] == "batch_stream_async" else __handle_gen
82
+ p_batch = [item[p_column] for item in batch_content_it]
83
+ it = handler(handle=kwargs["handle_batch_func"], batch=p_batch, event_loop=kwargs["event_loop"])
84
+ for ind_in_batch, chunk in it:
85
+ yield ind_in_batch, chunk
86
+
87
+
88
+ def _infer_batch(batch, batch_ind, schema, return_mode, cols=None, **kwargs):
89
+ assert (isinstance(batch, list))
90
+
91
+ if len(batch) == 0:
92
+ return batch
93
+
94
+ if cols is None:
95
+ first_item = batch[0]
96
+ cols = list(first_item.keys()) if cols is None else cols
97
+
98
+ for c in cols:
99
+
100
+ # Handling prompt column.
101
+ if c in schema.p2r:
102
+ content_it = _iter_batch_prompts(c=c, batch_content_it=iter(batch), **kwargs)
103
+ for ind_in_batch, prompt in content_it:
104
+ batch[ind_in_batch][c] = prompt
105
+
106
+ # Handling column for inference.
107
+ if c in schema.r2p:
108
+ content_it = _iter_chunks(p_column=schema.r2p[c], batch_content_it=iter(batch), **kwargs)
109
+ # Register values.
110
+ for item in batch:
111
+ item[c] = []
112
+ for ind_in_batch, chunk in content_it:
113
+ # Append batch.
114
+ batch[ind_in_batch][c].append(chunk)
115
+ # Returning (optional).
116
+ if return_mode == "chunk":
117
+ global_ind = batch_ind * len(batch) + ind_in_batch
118
+ yield [global_ind, c, chunk]
119
+
120
+ # Convert content to string.
121
+ for item in batch:
122
+ item[c] = "".join(item[c])
123
+
124
+ if return_mode == "record":
125
+ for record in batch:
126
+ yield record
127
+
128
+ if return_mode == "batch":
129
+ yield batch
130
+
131
+
132
+ def iter_content(input_dicts_it, llm, schema, batch_size=1, limit_prompt=None,
133
+ infer_mode="batch", return_mode="batch", attempts=1, event_loop=None,
134
+ **kwargs):
135
+ """ This method represent Python API aimed at application of `llm` towards
136
+ iterator of input_dicts via cache_target that refers to the SQLite using
137
+ the given `schema`
138
+ """
139
+ assert (infer_mode in INFER_MODES.keys())
140
+ assert (return_mode in ["batch", "chunk", "record"])
141
+ assert (isinstance(llm, BaseLM))
142
+
143
+ # Setup event loop.
144
+ event_loop = asyncio.get_event_loop_policy().get_event_loop() \
145
+ if event_loop is None else event_loop
146
+
147
+ # Quick initialization of the schema.
148
+ if isinstance(schema, str):
149
+ schema = JsonService.read(schema)
150
+ if isinstance(schema, dict):
151
+ schema = SchemaService(json_data=schema)
152
+
153
+ prompts_it = map(
154
+ lambda data: DictionaryService.custom_update(src_dict=dict(data), other_dict=schema.cot_args),
155
+ input_dicts_it
156
+ )
157
+
158
+ handle_batch_func = lambda batch, **handle_kwargs: INFER_MODES[infer_mode](
159
+ llm,
160
+ DataService.limit_prompts(batch, limit=limit_prompt),
161
+ **handle_kwargs
162
+ )
163
+
164
+ # Optional wrapping into attempts.
165
+ if attempts > 1:
166
+ # Optional setup of the logger.
167
+ logger = logging.getLogger(__name__)
168
+ logging.basicConfig(level=logging.INFO)
169
+
170
+ attempt_dec = attempt_wrapper(attempts=attempts,
171
+ delay_sec=kwargs.get("attempt_delay_sec", 1),
172
+ logger=logger)
173
+ handle_batch_func = attempt_dec(handle_batch_func)
174
+
175
+ content_it = (_infer_batch(batch=batch,
176
+ batch_ind=batch_ind,
177
+ infer_mode=infer_mode,
178
+ handle_batch_func=handle_batch_func,
179
+ handle_missed_value_func=lambda *_: None,
180
+ return_mode=return_mode,
181
+ schema=schema,
182
+ event_loop=event_loop,
183
+ **kwargs)
184
+ for batch_ind, batch in enumerate(BatchIterator(prompts_it, batch_size=batch_size)))
185
+
186
+ yield from chain.from_iterable(content_it)
@@ -0,0 +1,24 @@
1
+ class BaseLM(object):
2
+
3
+ def __init__(self, **kwargs):
4
+ pass
5
+
6
+ def ask(self, content):
7
+ """ Assumes to return str.
8
+ """
9
+ raise NotImplemented()
10
+
11
+ def ask_stream(self, content):
12
+ """ Assumes to return generator.
13
+ """
14
+ raise NotImplemented()
15
+
16
+ async def ask_async(self, prompt):
17
+ """ Assumes to return co-routine.
18
+ """
19
+ raise NotImplemented()
20
+
21
+ async def ask_stream_async(self, batch):
22
+ """ Assumes to return AsyncGenerator.
23
+ """
24
+ raise NotImplemented()
@@ -0,0 +1,65 @@
1
+ import asyncio
2
+ from typing import AsyncGenerator, Any
3
+
4
+
5
+ class AsyncioService:
6
+
7
+ @staticmethod
8
+ async def _run_tasks_async(batch, async_handler):
9
+ tasks = [async_handler(prompt) for prompt in batch]
10
+ return await asyncio.gather(*tasks)
11
+
12
+ @staticmethod
13
+ async def _run_generator(gen, output_queue, idx):
14
+ try:
15
+ async for item in gen:
16
+ await output_queue.put((idx, item))
17
+ finally:
18
+ await output_queue.put((idx, StopAsyncIteration))
19
+
20
+
21
+ @staticmethod
22
+ def run_tasks(event_loop, **tasks_kwargs):
23
+ return event_loop.run_until_complete(AsyncioService._run_tasks_async(**tasks_kwargs))
24
+
25
+ @staticmethod
26
+ async def merge_generators(*gens: AsyncGenerator[Any, None]) -> AsyncGenerator[Any, None]:
27
+
28
+ output_queue = asyncio.Queue()
29
+ tasks = [
30
+ asyncio.create_task(AsyncioService._run_generator(gen, output_queue, idx))
31
+ for idx, gen in enumerate(gens)
32
+ ]
33
+
34
+ finished = set()
35
+ while len(finished) < len(tasks):
36
+ idx, item = await output_queue.get()
37
+ if item is StopAsyncIteration:
38
+ finished.add(idx)
39
+ else:
40
+ yield item
41
+
42
+ for task in tasks:
43
+ task.cancel()
44
+
45
+ @staticmethod
46
+ def async_gen_to_iter(gen, loop=None):
47
+ """ This approach is limited. Could be considered as legacy.
48
+ https://stackoverflow.com/questions/71580727/translating-async-generator-into-sync-one/78573267#78573267
49
+ """
50
+
51
+ loop_created = False
52
+ if loop is None:
53
+ loop_created = True
54
+ loop = asyncio.new_event_loop()
55
+
56
+ asyncio.set_event_loop(loop)
57
+ try:
58
+ while True:
59
+ try:
60
+ yield loop.run_until_complete(gen.__anext__())
61
+ except StopAsyncIteration:
62
+ break
63
+ finally:
64
+ if loop_created:
65
+ loop.close()
@@ -1,27 +1,8 @@
1
- class BatchService(object):
2
-
3
- @staticmethod
4
- def handle_param_as_batch(batch, src_param, tgt_param, handle_batch_func, handle_entry_func):
5
- assert (isinstance(batch, list))
6
- assert (isinstance(src_param, str))
7
- assert (callable(handle_batch_func))
8
-
9
- _batch = [item[src_param] for item in batch]
10
-
11
- # Do handling for the batch.
12
- _handled_batch = handle_batch_func(_batch)
13
- assert (isinstance(_handled_batch, list))
14
-
15
- # Apply changes.
16
- for i, item in enumerate(batch):
17
- item[tgt_param] = handle_entry_func(entry=_handled_batch[i], info={"ind": i, "param": tgt_param})
18
-
19
-
20
1
  class BatchIterator:
21
2
 
22
3
  def __init__(self, data_iter, batch_size, end_value=None, filter_func=None):
23
- assert(isinstance(batch_size, int) and batch_size > 0)
24
- assert(callable(end_value) or end_value is None)
4
+ assert (isinstance(batch_size, int) and batch_size > 0)
5
+ assert (callable(end_value) or end_value is None)
25
6
  self.__data_iter = data_iter
26
7
  self.__index = 0
27
8
  self.__batch_size = batch_size
@@ -1,6 +1,7 @@
1
1
  import importlib
2
2
  import logging
3
3
  import sys
4
+ import time
4
5
  from collections import Counter
5
6
  from os.path import dirname, join, basename
6
7
 
@@ -48,28 +49,6 @@ def iter_params(text):
48
49
  beg = pe+1
49
50
 
50
51
 
51
- def format_model_name(name):
52
- return name.replace("/", "_")
53
-
54
-
55
- def parse_filepath(filepath, default_filepath=None, default_ext=None):
56
- """ This is an auxiliary function for handling sources and targets from cmd string.
57
- """
58
- if filepath is None:
59
- return default_filepath, default_ext, None
60
- info = filepath.split(":")
61
- filepath = info[0]
62
- meta = info[1] if len(info) > 1 else None
63
- ext = filepath.split('.')[-1] if default_ext is None else default_ext
64
- return filepath, ext, meta
65
-
66
-
67
- def handle_table_name(name):
68
- return name.\
69
- replace('-', '_').\
70
- replace('.', "_")
71
-
72
-
73
52
  def auto_import(name, is_class=False):
74
53
  """ Import from the external python packages.
75
54
  """
@@ -82,10 +61,10 @@ def auto_import(name, is_class=False):
82
61
  return m() if is_class else m
83
62
 
84
63
 
85
- def dynamic_init(class_dir, class_filepath, class_name=None):
64
+ def dynamic_init(class_filepath, class_name=None):
86
65
 
87
66
  # Registering path.
88
- target = join(class_dir, dirname(class_filepath))
67
+ target = join(dirname(class_filepath))
89
68
  logger.info(f"Adding sys path for `{target}`")
90
69
  sys.path.insert(1, target)
91
70
  class_path_list = class_filepath.split('/')
@@ -111,3 +90,21 @@ def optional_limit_iter(it_data, limit=None):
111
90
  if limit is not None and counter["returned"] > limit:
112
91
  break
113
92
  yield data
93
+
94
+
95
+ def attempt_wrapper(attempts, delay_sec=1, logger=None):
96
+ def decorator(func):
97
+ def wrapper(*args, **kwargs):
98
+ for i in range(attempts):
99
+ try:
100
+ # Do action.
101
+ return func(*args, **kwargs)
102
+ except Exception as e:
103
+ if logger is not None:
104
+ logger.info(f"Unable to infer the result. Try {i} out of {attempts}.")
105
+ logger.info(e)
106
+ if delay_sec is not None:
107
+ time.sleep(delay_sec)
108
+ raise Exception(f"Failed after {attempts} attempts")
109
+ return wrapper
110
+ return decorator