bulk-chain 0.24.0__tar.gz → 0.24.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/PKG-INFO +9 -3
  2. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/README.md +7 -2
  3. bulk_chain-0.24.2/bulk_chain/core/llm_base.py +35 -0
  4. bulk_chain-0.24.2/bulk_chain/core/service_json.py +10 -0
  5. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain/core/service_llm.py +7 -7
  6. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain/infer.py +26 -20
  7. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain.egg-info/PKG-INFO +9 -3
  8. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain.egg-info/SOURCES.txt +0 -2
  9. bulk_chain-0.24.2/bulk_chain.egg-info/requires.txt +2 -0
  10. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/setup.py +1 -1
  11. bulk_chain-0.24.0/bulk_chain/core/llm_base.py +0 -13
  12. bulk_chain-0.24.0/bulk_chain/core/provider_sqlite.py +0 -79
  13. bulk_chain-0.24.0/bulk_chain/core/service_csv.py +0 -57
  14. bulk_chain-0.24.0/bulk_chain/core/service_json.py +0 -26
  15. bulk_chain-0.24.0/bulk_chain.egg-info/requires.txt +0 -1
  16. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/LICENSE +0 -0
  17. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain/__init__.py +0 -0
  18. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain/core/__init__.py +0 -0
  19. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain/core/service_args.py +0 -0
  20. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain/core/service_data.py +0 -0
  21. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain/core/service_schema.py +0 -0
  22. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain/core/utils.py +0 -0
  23. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain.egg-info/dependency_links.txt +0 -0
  24. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/bulk_chain.egg-info/top_level.txt +0 -0
  25. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/setup.cfg +0 -0
  26. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/test/test_args_seeking.py +0 -0
  27. {bulk_chain-0.24.0 → bulk_chain-0.24.2}/test/test_cmdargs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bulk_chain
3
- Version: 0.24.0
3
+ Version: 0.24.2
4
4
  Summary: A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, ensuring reliable results for bulk input requests.
5
5
  Home-page: https://github.com/nicolay-r/bulk-chain
6
6
  Author: Nicolay Rusnachenko
@@ -16,10 +16,16 @@ Requires-Python: >=3.6
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: tqdm
19
+ Requires-Dist: source-iter==0.24.2
19
20
 
20
- # bulk-chain
21
+ # bulk-chain 0.24.2
21
22
  ![](https://img.shields.io/badge/Python-3.9-brightgreen.svg)
22
23
  [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
24
+ [![twitter](https://img.shields.io/twitter/url/https/shields.io.svg?style=social)](https://x.com/nicolayr_/status/1847969224636961033)
25
+
26
+ <p align="center">
27
+ <img src="logo.png"/>
28
+ </p>
23
29
 
24
30
  A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903) framework** for your LLM, ensuring reliable results for bulk input requests stored in `CSV` / `JSONL` / `sqlite`.
25
31
  It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
@@ -33,7 +39,7 @@ It allows applying series of prompts formed into `schema` (See [related section]
33
39
  # Installation
34
40
 
35
41
  ```bash
36
- pip install git+https://github.com/nicolay-r/bulk-chain@master
42
+ pip install bulk-chain
37
43
  ```
38
44
 
39
45
  ## Chain-of-Thought Schema
@@ -1,6 +1,11 @@
1
- # bulk-chain
1
+ # bulk-chain 0.24.2
2
2
  ![](https://img.shields.io/badge/Python-3.9-brightgreen.svg)
3
3
  [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
4
+ [![twitter](https://img.shields.io/twitter/url/https/shields.io.svg?style=social)](https://x.com/nicolayr_/status/1847969224636961033)
5
+
6
+ <p align="center">
7
+ <img src="logo.png"/>
8
+ </p>
4
9
 
5
10
  A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903) framework** for your LLM, ensuring reliable results for bulk input requests stored in `CSV` / `JSONL` / `sqlite`.
6
11
  It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
@@ -14,7 +19,7 @@ It allows applying series of prompts formed into `schema` (See [related section]
14
19
  # Installation
15
20
 
16
21
  ```bash
17
- pip install git+https://github.com/nicolay-r/bulk-chain@master
22
+ pip install bulk-chain
18
23
  ```
19
24
 
20
25
  ## Chain-of-Thought Schema
@@ -0,0 +1,35 @@
1
+ import logging
2
+ import time
3
+
4
+ from bulk_chain.core.utils import format_model_name
5
+
6
+
7
+ class BaseLM(object):
8
+
9
+ def __init__(self, name, attempts=None, delay_sec=1, enable_log=True, **kwargs):
10
+ self.__name = name
11
+ self.__attempts = 1 if attempts is None else attempts
12
+ self.__delay_sec = delay_sec
13
+
14
+ if enable_log:
15
+ self.__logger = logging.getLogger(__name__)
16
+ logging.basicConfig(level=logging.INFO)
17
+
18
+ def ask_safe(self, prompt):
19
+
20
+ for i in range(self.__attempts):
21
+ try:
22
+ response = self.ask(prompt)
23
+ return response
24
+ except:
25
+ if self.__logger is not None:
26
+ self.__logger.info("Unable to infer the result. Try {} out of {}.".format(i, self.__attempts))
27
+ time.sleep(self.__delay_sec)
28
+
29
+ raise Exception("Can't infer")
30
+
31
+ def ask(self, prompt):
32
+ raise NotImplemented()
33
+
34
+ def name(self):
35
+ return format_model_name(self.__name)
@@ -0,0 +1,10 @@
1
+ import json
2
+
3
+
4
+ class JsonService(object):
5
+
6
+ @staticmethod
7
+ def read(src):
8
+ assert (isinstance(src, str))
9
+ with open(src, "r") as f:
10
+ return json.load(f)
@@ -4,9 +4,6 @@ from bulk_chain.core.llm_base import BaseLM
4
4
  from bulk_chain.core.service_data import DataService
5
5
  from bulk_chain.core.utils import iter_params
6
6
 
7
- logger = logging.getLogger(__name__)
8
- logging.basicConfig(level=logging.INFO)
9
-
10
7
 
11
8
  def pad_str(text, pad):
12
9
  return text.rjust(len(text) + pad, ' ')
@@ -27,9 +24,12 @@ def nice_output(text, width, pad=4, remove_new_line=False):
27
24
 
28
25
 
29
26
  def chat_with_lm(lm, chain=None, model_name=None):
30
- assert(isinstance(lm, BaseLM))
31
- assert(isinstance(chain, list))
32
- assert(isinstance(model_name, str) or model_name is None)
27
+ assert (isinstance(lm, BaseLM))
28
+ assert (isinstance(chain, list))
29
+ assert (isinstance(model_name, str) or model_name is None)
30
+
31
+ logger = logging.getLogger(__name__)
32
+ logging.basicConfig(level=logging.INFO)
33
33
 
34
34
  do_exit = False
35
35
  model_name = model_name if model_name is not None else "agent"
@@ -74,7 +74,7 @@ def chat_with_lm(lm, chain=None, model_name=None):
74
74
  logger.info(nice_output(actual_prompt, pad=pad*2, remove_new_line=True, width=80))
75
75
 
76
76
  # Response.
77
- response = lm.ask(actual_prompt)
77
+ response = lm.ask_safe(actual_prompt)
78
78
  logger.info(pad_str(f"{model_name} (resp)->", pad=pad))
79
79
  logger.info(nice_output(response, pad=pad*2, remove_new_line=False, width=80))
80
80
 
@@ -1,16 +1,18 @@
1
+ import os
2
+ from os.path import join, basename
3
+
1
4
  import argparse
2
5
  import logging
3
- import os
4
6
  import sys
5
7
 
6
8
  from tqdm import tqdm
7
9
 
8
- from os.path import join, basename
10
+ from source_iter.service_csv import CsvService
11
+ from source_iter.service_jsonl import JsonlService
12
+ from source_iter.service_sqlite import SQLite3Service
9
13
 
10
14
  from bulk_chain.core.llm_base import BaseLM
11
- from bulk_chain.core.provider_sqlite import SQLiteProvider
12
15
  from bulk_chain.core.service_args import CmdArgsService
13
- from bulk_chain.core.service_csv import CsvService
14
16
  from bulk_chain.core.service_data import DataService
15
17
  from bulk_chain.core.service_json import JsonService
16
18
  from bulk_chain.core.service_llm import chat_with_lm
@@ -45,10 +47,10 @@ def init_llm(**model_kwargs):
45
47
 
46
48
 
47
49
  def init_schema(json_filepath):
48
- return SchemaService(json_data=JsonService.read_data(json_filepath))
50
+ return SchemaService(json_data=JsonService.read(json_filepath))
49
51
 
50
52
 
51
- def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
53
+ def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table, id_column_name):
52
54
  """ This method represent Python API aimed at application of `llm` towards
53
55
  iterator of input_dicts via cache_target that refers to the SQLite using
54
56
  the given `schema`
@@ -59,7 +61,7 @@ def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
59
61
  assert (isinstance(cache_table, str))
60
62
 
61
63
  infer_modes = {
62
- "default": lambda prompt: llm.ask(prompt[:args.limit_prompt] if args.limit_prompt is not None else prompt)
64
+ "default": lambda prompt: llm.ask_safe(prompt[:args.limit_prompt] if args.limit_prompt is not None else prompt)
63
65
  }
64
66
 
65
67
  def optional_update_data_records(c, data):
@@ -75,11 +77,11 @@ def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
75
77
  return data[c]
76
78
 
77
79
  cache_providers = {
78
- "sqlite": lambda filepath, table_name, data_it: SQLiteProvider.write_auto(
80
+ "sqlite": lambda filepath, table_name, data_it: SQLite3Service.write_missed(
79
81
  data_it=data_it, target=filepath,
80
82
  data2col_func=optional_update_data_records,
81
83
  table_name=handle_table_name(table_name if table_name is not None else "contents"),
82
- id_column_name="uid")
84
+ id_column_name=id_column_name)
83
85
  }
84
86
 
85
87
  # We optionally wrap into limiter.
@@ -90,18 +92,18 @@ def iter_content(input_dicts_iter, llm, schema, cache_target, cache_table):
90
92
  # Provide data caching.
91
93
  cache_providers["sqlite"](cache_target, table_name=tgt_meta, data_it=tqdm(queries_it, desc="Iter content"))
92
94
 
93
- return SQLiteProvider.read(cache_target, table=cache_table)
95
+ return SQLite3Service.read(cache_target, table=cache_table)
94
96
 
95
97
 
96
98
  if __name__ == '__main__':
97
99
 
98
100
  parser = argparse.ArgumentParser(description="Infer Instruct LLM inference based on CoT schema")
99
101
  parser.add_argument('--adapter', dest='adapter', type=str, default=None)
102
+ parser.add_argument('--attempts', dest='attempts', type=int, default=None)
103
+ parser.add_argument('--id-col', dest='id_col', type=str, default="uid")
100
104
  parser.add_argument('--src', dest='src', type=str, default=None)
101
105
  parser.add_argument('--schema', dest='schema', type=str, default=None,
102
106
  help="Path to the JSON file that describes schema")
103
- parser.add_argument('--csv-sep', dest='csv_sep', type=str, default='\t')
104
- parser.add_argument('--csv-escape-char', dest='csv_escape_char', type=str, default=None)
105
107
  parser.add_argument('--to', dest='to', type=str, default=None, choices=["csv", "sqlite"])
106
108
  parser.add_argument('--output', dest='output', type=str, default=None)
107
109
  parser.add_argument('--limit', dest='limit', type=int, default=None,
@@ -114,7 +116,8 @@ if __name__ == '__main__':
114
116
  args = parser.parse_args(args=native_args[1:])
115
117
 
116
118
  # Initialize Large Language Model.
117
- llm, llm_model_name = init_llm(**CmdArgsService.args_to_dict(model_args))
119
+ model_args_dict = CmdArgsService.args_to_dict(model_args) | {"attempts": args.attempts}
120
+ llm, llm_model_name = init_llm(**model_args_dict)
118
121
 
119
122
  # Setup schema.
120
123
  schema = init_schema(args.schema)
@@ -123,17 +126,19 @@ if __name__ == '__main__':
123
126
 
124
127
  input_providers = {
125
128
  None: lambda _: chat_with_lm(llm, chain=schema.chain, model_name=llm_model_name),
126
- "csv": lambda filepath: CsvService.read(target=filepath, row_id_key="uid", delimiter=args.csv_sep,
127
- as_dict=True, skip_header=True, escapechar=args.csv_escape_char),
128
- "jsonl": lambda filepath: JsonService.read_lines(src=filepath, row_id_key="uid")
129
+ "csv": lambda filepath: CsvService.read(src=filepath, row_id_key=args.id_col,
130
+ as_dict=True, skip_header=True,
131
+ delimiter=model_args_dict.get("delimiter", "\t"),
132
+ escapechar=model_args_dict.get("escapechar", None)),
133
+ "jsonl": lambda filepath: JsonlService.read(src=filepath, row_id_key=args.id_col)
129
134
  }
130
135
 
131
136
  output_providers = {
132
137
  "csv": lambda filepath, data_it, header:
133
- CsvService.write_handled(target=filepath, data_it=data_it, header=header, data2col_func=lambda v: list(v)),
138
+ CsvService.write(target=filepath, data_it=data_it, header=header, it_type=None),
134
139
  "jsonl": lambda filepath, data_it, header:
135
- JsonService.write_lines(target=filepath,
136
- data_it=map(lambda item: {key:item[i] for i, key in enumerate(header)}, data_it))
140
+ JsonlService.write(target=filepath,
141
+ data_it=map(lambda item: {key: item[i] for i, key in enumerate(header)}, data_it))
137
142
  }
138
143
 
139
144
  # Setup output.
@@ -156,6 +161,7 @@ if __name__ == '__main__':
156
161
  data_it = iter_content(input_dicts_iter=input_providers[src_ext](src_filepath),
157
162
  schema=schema,
158
163
  llm=llm,
164
+ id_column_name=args.id_col,
159
165
  cache_target=cache_target,
160
166
  cache_table=cache_table)
161
167
 
@@ -167,4 +173,4 @@ if __name__ == '__main__':
167
173
  # Perform output writing process.
168
174
  output_providers[tgt_ext](filepath=output_target,
169
175
  data_it=data_it,
170
- header=SQLiteProvider.get_columns(target=cache_target, table=cache_table))
176
+ header=SQLite3Service.read_columns(target=cache_target, table=cache_table))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: bulk_chain
3
- Version: 0.24.0
3
+ Version: 0.24.2
4
4
  Summary: A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, ensuring reliable results for bulk input requests.
5
5
  Home-page: https://github.com/nicolay-r/bulk-chain
6
6
  Author: Nicolay Rusnachenko
@@ -16,10 +16,16 @@ Requires-Python: >=3.6
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: tqdm
19
+ Requires-Dist: source-iter==0.24.2
19
20
 
20
- # bulk-chain
21
+ # bulk-chain 0.24.2
21
22
  ![](https://img.shields.io/badge/Python-3.9-brightgreen.svg)
22
23
  [![](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/nicolay-r/bulk-chain/blob/master/bulk_chain_tutorial.ipynb)
24
+ [![twitter](https://img.shields.io/twitter/url/https/shields.io.svg?style=social)](https://x.com/nicolayr_/status/1847969224636961033)
25
+
26
+ <p align="center">
27
+ <img src="logo.png"/>
28
+ </p>
23
29
 
24
30
  A lightweight, no-strings-attached **[Chain-of-Thought](https://arxiv.org/abs/2201.11903) framework** for your LLM, ensuring reliable results for bulk input requests stored in `CSV` / `JSONL` / `sqlite`.
25
31
  It allows applying series of prompts formed into `schema` (See [related section](#chain-of-thought-schema))
@@ -33,7 +39,7 @@ It allows applying series of prompts formed into `schema` (See [related section]
33
39
  # Installation
34
40
 
35
41
  ```bash
36
- pip install git+https://github.com/nicolay-r/bulk-chain@master
42
+ pip install bulk-chain
37
43
  ```
38
44
 
39
45
  ## Chain-of-Thought Schema
@@ -10,9 +10,7 @@ bulk_chain.egg-info/requires.txt
10
10
  bulk_chain.egg-info/top_level.txt
11
11
  bulk_chain/core/__init__.py
12
12
  bulk_chain/core/llm_base.py
13
- bulk_chain/core/provider_sqlite.py
14
13
  bulk_chain/core/service_args.py
15
- bulk_chain/core/service_csv.py
16
14
  bulk_chain/core/service_data.py
17
15
  bulk_chain/core/service_json.py
18
16
  bulk_chain/core/service_llm.py
@@ -0,0 +1,2 @@
1
+ tqdm
2
+ source-iter==0.24.2
@@ -15,7 +15,7 @@ def get_requirements(filenames):
15
15
 
16
16
  setup(
17
17
  name='bulk_chain',
18
- version='0.24.0',
18
+ version='0.24.2',
19
19
  python_requires=">=3.6",
20
20
  description='A lightweight, no-strings-attached Chain-of-Thought framework for your LLM, '
21
21
  'ensuring reliable results for bulk input requests.',
@@ -1,13 +0,0 @@
1
- from bulk_chain.core.utils import format_model_name
2
-
3
-
4
- class BaseLM(object):
5
-
6
- def __init__(self, name):
7
- self.__name = name
8
-
9
- def ask(self, prompt):
10
- raise NotImplemented()
11
-
12
- def name(self):
13
- return format_model_name(self.__name)
@@ -1,79 +0,0 @@
1
- import sqlite3
2
-
3
-
4
- class SQLiteProvider(object):
5
-
6
- @staticmethod
7
- def __create_table(table_name, columns, id_column_name,
8
- id_column_type, sqlite3_column_types, cur):
9
-
10
- # Provide the ID column.
11
- sqlite3_column_types = [id_column_type] + sqlite3_column_types
12
-
13
- # Compose the whole columns list.
14
- content = ", ".join([" ".join(item) for item in zip(columns, sqlite3_column_types)])
15
- cur.execute(f"CREATE TABLE IF NOT EXISTS {table_name}({content})")
16
- cur.execute(f"CREATE INDEX IF NOT EXISTS i_id ON {table_name}({id_column_name})")
17
-
18
- @staticmethod
19
- def write_auto(data_it, target, data2col_func, table_name, id_column_name="id",
20
- id_column_type="INTEGER"):
21
- """ NOTE: data_it is an iterator of dictionaries.
22
- This implementation automatically creates the table and
23
- """
24
- with sqlite3.connect(target) as con:
25
- cur = con.cursor()
26
-
27
- columns = None
28
- for data in data_it:
29
- assert(isinstance(data, dict))
30
-
31
- # Extracting columns from data.
32
- row_columns = list(data.keys())
33
- assert(id_column_name in row_columns)
34
-
35
- # Optionally create table.
36
- if columns is None:
37
-
38
- # Setup list of columns.
39
- columns = row_columns
40
- # Place ID column first.
41
- columns.insert(0, columns.pop(columns.index(id_column_name)))
42
-
43
- SQLiteProvider.__create_table(
44
- columns=columns, table_name=table_name, cur=cur,
45
- id_column_name=id_column_name, id_column_type=id_column_type,
46
- sqlite3_column_types=["TEXT"] * len(columns))
47
-
48
- # Check that each rows satisfies criteria of the first row.
49
- [Exception(f"{column} is expected to be in row!") for column in row_columns if column not in columns]
50
-
51
- uid = data[id_column_name]
52
- r = cur.execute(f"SELECT EXISTS(SELECT 1 FROM {table_name} WHERE {id_column_name}='{uid}');")
53
- ans = r.fetchone()[0]
54
- if ans == 1:
55
- continue
56
-
57
- params = ", ".join(tuple(['?'] * (len(columns))))
58
- row_columns_str = ", ".join(row_columns)
59
- cur.execute(f"INSERT INTO {table_name}({row_columns_str}) VALUES ({params})",
60
- [data2col_func(c, data) for c in row_columns])
61
- con.commit()
62
-
63
- cur.close()
64
-
65
- @staticmethod
66
- def read(target, column_names=None, table="content"):
67
- with sqlite3.connect(target) as conn:
68
- cursor = conn.cursor()
69
- cols = "*" if column_names is None else ",".join(column_names)
70
- cursor.execute(f"SELECT {cols} FROM {table}")
71
- for row in cursor:
72
- yield row
73
-
74
- @staticmethod
75
- def get_columns(target, table="content"):
76
- with sqlite3.connect(target) as conn:
77
- cursor = conn.cursor()
78
- cursor.execute(f"PRAGMA table_info({table})")
79
- return [row[1] for row in cursor.fetchall()]
@@ -1,57 +0,0 @@
1
- import csv
2
- import logging
3
-
4
- logger = logging.getLogger(__name__)
5
- logging.basicConfig(level=logging.INFO)
6
-
7
-
8
- class CsvService:
9
-
10
- @staticmethod
11
- def write(target, lines_it):
12
- f = open(target, "w")
13
- logger.info(f"Saving: {target}")
14
- w = csv.writer(f, delimiter="\t", quotechar='"', quoting=csv.QUOTE_MINIMAL)
15
- for content in lines_it:
16
- w.writerow(content)
17
-
18
- @staticmethod
19
- def write_handled(target, data_it, data2col_func, header):
20
-
21
- def __it():
22
- yield header
23
- for data in data_it:
24
- content = data2col_func(data)
25
- assert(len(content) == len(header))
26
- yield content
27
-
28
- CsvService.write(target, lines_it=__it())
29
-
30
- @staticmethod
31
- def read(target, skip_header=False, cols=None, as_dict=False, row_id_key=None, **csv_kwargs):
32
- assert (isinstance(row_id_key, str) or row_id_key is None)
33
- assert (isinstance(cols, list) or cols is None)
34
-
35
- header = None
36
- with open(target, newline='\n') as f:
37
- for row_id, row in enumerate(csv.reader(f, **csv_kwargs)):
38
- if skip_header and row_id == 0:
39
- header = ([row_id_key] if row_id_key is not None else []) + row
40
- continue
41
-
42
- # Determine the content we wish to return.
43
- if cols is None:
44
- content = row
45
- else:
46
- row_d = {header[col_ind]: value for col_ind, value in enumerate(row)}
47
- content = [row_d[col_name] for col_name in cols]
48
-
49
- content = ([row_id-1] if row_id_key is not None else []) + content
50
-
51
- # Optionally attach row_id to the content.
52
- if as_dict:
53
- assert (header is not None)
54
- assert (len(content) == len(header))
55
- yield {k: v for k, v in zip(header, content)}
56
- else:
57
- yield content
@@ -1,26 +0,0 @@
1
- import json
2
-
3
-
4
- class JsonService(object):
5
-
6
- @staticmethod
7
- def read_data(src):
8
- assert (isinstance(src, str))
9
- with open(src, "r") as f:
10
- return json.load(f)
11
-
12
- @staticmethod
13
- def read_lines(src, row_id_key=None):
14
- assert (isinstance(src, str))
15
- with open(src, "r") as f:
16
- for line_ind, line in enumerate(f.readlines()):
17
- content = json.loads(line)
18
- if row_id_key is not None:
19
- content[row_id_key] = line_ind
20
- yield content
21
-
22
- @staticmethod
23
- def write_lines(target, data_it):
24
- with open(target, "w") as f:
25
- for item in data_it:
26
- f.write(f"{json.dumps(item, ensure_ascii=False)}\n")
@@ -1 +0,0 @@
1
- tqdm
File without changes
File without changes