hestia-earth-utils 0.16.5__py3-none-any.whl → 0.16.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ from functools import reduce
5
5
  from statistics import mode, mean
6
6
  from hestia_earth.schema import TermTermType
7
7
 
8
- from .lookup import download_lookup, get_table_value, column_name
8
+ from .lookup import download_lookup, get_table_value
9
9
  from .tools import non_empty_list, non_empty_value, flatten
10
10
  from .emission import cycle_emissions_in_system_boundary
11
11
  from .model import filter_list_term_type
@@ -15,7 +15,7 @@ def get_lookup_value(blank_node: dict, column: str):
15
15
  term = blank_node.get('term', {})
16
16
  table_name = f"{term.get('termType')}.csv" if term else None
17
17
  value = get_table_value(
18
- download_lookup(table_name), 'termid', term.get('@id'), column_name(column)
18
+ download_lookup(table_name), 'term.id', term.get('@id'), column
19
19
  ) if table_name else None
20
20
  return value
21
21
 
@@ -1,15 +1,14 @@
1
1
  from functools import reduce
2
- from typing import Union
2
+ from typing import Any
3
3
  import requests
4
- import numpy
4
+ from io import StringIO
5
+ import pandas as pd
5
6
 
6
7
  from .storage import _load_from_storage
7
8
  from .request import request_url, web_url
8
- from .csv_utils import csv_str_to_recarray, csv_file_to_recarray, is_missing_value, _replace_chars
9
9
 
10
10
  _GLOSSARY_FOLDER = 'glossary/lookups'
11
11
  _memory = {}
12
- _INDEX_COL = 'termid'
13
12
 
14
13
 
15
14
  def _memory_wrapper(key: str, func):
@@ -18,6 +17,18 @@ def _memory_wrapper(key: str, func):
18
17
  return _memory[key]
19
18
 
20
19
 
20
+ def _read_csv(value: str) -> pd.DataFrame:
21
+ return pd.read_csv(value, na_values=['-', ''])
22
+
23
+
24
+ def _read_csv_from_string(data: str) -> pd.DataFrame:
25
+ return _read_csv(StringIO(data))
26
+
27
+
28
+ def is_missing_value(value):
29
+ return pd.isna(value) or value is None or value == '' or value == '-'
30
+
31
+
21
32
  def load_lookup(filepath: str, keep_in_memory: bool = False):
22
33
  """
23
34
  Import local lookup table as csv file into a `numpy.recarray`.
@@ -34,7 +45,7 @@ def load_lookup(filepath: str, keep_in_memory: bool = False):
34
45
  numpy.recarray
35
46
  The `numpy.recarray` converted from the csv content.
36
47
  """
37
- def load(): return csv_file_to_recarray(filepath)
48
+ def load(): return _read_csv(filepath)
38
49
  return _memory_wrapper(filepath, load) if keep_in_memory else load()
39
50
 
40
51
 
@@ -43,7 +54,8 @@ def _download_lookup_data(filename: str):
43
54
 
44
55
  def fallback():
45
56
  url = request_url(f"{web_url()}/{filepath}")
46
- return requests.get(url).content.decode('utf-8')
57
+ data = requests.get(url).content.decode('utf-8')
58
+ return data if data and '<html' not in data else None
47
59
 
48
60
  try:
49
61
  data = _load_from_storage(filepath, glossary=True)
@@ -52,20 +64,7 @@ def _download_lookup_data(filename: str):
52
64
  return fallback()
53
65
 
54
66
 
55
- def _build_index(array: numpy.recarray):
56
- columns = list(array.dtype.names)
57
- try:
58
- return {
59
- row[_INDEX_COL]: {col: row[col] for col in columns}
60
- for row in array
61
- } if _INDEX_COL in columns else array
62
- except TypeError:
63
- return {
64
- array[_INDEX_COL].item(): {col: array[col].item() for col in columns}
65
- } if _INDEX_COL in columns else array
66
-
67
-
68
- def download_lookup(filename: str, keep_in_memory: bool = True, build_index: bool = False):
67
+ def download_lookup(filename: str, keep_in_memory: bool = True):
69
68
  """
70
69
  Download lookup table from HESTIA as csv into a `numpy.recarray`.
71
70
 
@@ -85,8 +84,7 @@ def download_lookup(filename: str, keep_in_memory: bool = True, build_index: boo
85
84
  """
86
85
  def load():
87
86
  data = _download_lookup_data(filename)
88
- rec = csv_str_to_recarray(data) if data else None
89
- return (_build_index(rec) if build_index else rec) if data else None
87
+ return _read_csv_from_string(data) if data else None
90
88
 
91
89
  try:
92
90
  return _memory_wrapper(filename, load) if keep_in_memory else load()
@@ -96,19 +94,9 @@ def download_lookup(filename: str, keep_in_memory: bool = True, build_index: boo
96
94
 
97
95
  def column_name(key: str):
98
96
  """
99
- Convert the column name to a usable key on a `numpy.recarray`.
100
-
101
- Parameters
102
- ----------
103
- key : str
104
- The column name.
105
-
106
- Returns
107
- -------
108
- str
109
- The column name that can be used in `get_table_value`.
97
+ Deprecated. Columns are no longer renamed.
110
98
  """
111
- return _replace_chars(key) if key else ''
99
+ return key
112
100
 
113
101
 
114
102
  def _parse_value(value: str):
@@ -123,36 +111,24 @@ def _parse_value(value: str):
123
111
  return value
124
112
 
125
113
 
126
- def _get_single_table_value(data: Union[dict, numpy.recarray], col_match: str, col_match_with, col_val):
127
- return (
128
- data.get(col_match_with, {})[col_val] if isinstance(data, dict) else
129
- data[data[col_match] == col_match_with][col_val][0]
130
- )
131
-
132
-
133
- def _get_multiple_table_values(data: Union[dict, numpy.recarray], col_match: str, col_match_with, col_val):
134
- def reducer(x, values):
135
- col = values[1]
136
- value = col_match_with[values[0]]
137
- return x.get(value) if isinstance(x, dict) else x[x[col] == value]
114
+ def _get_single_table_value(df: pd.DataFrame, col_match: str, col_match_with, col_val):
115
+ filtered_df = df[df[col_match] == col_match_with]
116
+ return None if filtered_df.empty else filtered_df[col_val].iloc[0]
138
117
 
139
- return reduce(reducer, enumerate(col_match), data)[col_val][0]
140
118
 
141
-
142
- def get_table_value(lookup: Union[dict, numpy.recarray], col_match: str, col_match_with, col_val, default_value=''):
119
+ def get_table_value(lookup: pd.DataFrame, col_match: str, col_match_with: str, col_val: Any, default_value=''):
143
120
  """
144
121
  Get a value matched by one or more columns from a `numpy.recarray`.
145
122
 
146
123
  Parameters
147
124
  ----------
148
- lookup : dict | numpy.recarray
125
+ lookup : DataFrame
149
126
  The value returned by the `download_lookup` function.
150
127
  col_match : str
151
128
  Which `column` should be used to find data in. This will restrict the rows to search for.
152
129
  Can be a single `str` or a list of `str`. If a list is used, must be the same length as `col_match_with`.
153
- col_match_with
130
+ col_match_with: str
154
131
  Which column `value` should be used to find data in. This will restrict the rows to search for.
155
- Can be a single `str` or a list of `str`. If a list is used, must be the same length as `col_match`.
156
132
  col_val: str
157
133
  The column which contains the value to look for.
158
134
  default_value : Any
@@ -163,44 +139,35 @@ def get_table_value(lookup: Union[dict, numpy.recarray], col_match: str, col_mat
163
139
  str
164
140
  The value found or `None` if no match.
165
141
  """
166
- single = isinstance(col_match, str) and isinstance(col_match_with, str)
167
142
  try:
168
- value = (
169
- _get_single_table_value(lookup, col_match, col_match_with, col_val) if single else
170
- _get_multiple_table_values(lookup, col_match, col_match_with, col_val)
171
- )
143
+ value = _get_single_table_value(lookup, col_match, col_match_with, col_val)
144
+ print(value, type(value))
172
145
  return default_value if is_missing_value(value) else _parse_value(value)
173
146
  except Exception:
174
147
  return None
175
148
 
176
149
 
177
- def find_term_ids_by(lookup: Union[dict, numpy.recarray], col_match: str, col_match_with):
150
+ def find_term_ids_by(lookup: pd.DataFrame, col_match: str, col_match_with: str):
178
151
  """
179
152
  Find `term.id` values where a column matches a specific value.
180
153
 
181
154
  Parameters
182
155
  ----------
183
- lookup : dict | numpy.recarray
156
+ lookup : DataFrame
184
157
  The value returned by the `download_lookup` function.
185
158
  col_match : str
186
159
  Which `column` should be used to find data in. This will restrict the rows to search for.
187
160
  Can be a single `str` or a list of `str`. If a list is used, must be the same length as `col_match_with`.
188
- col_match_with
161
+ col_match_with: str
189
162
  Which column `value` should be used to find data in. This will restrict the rows to search for.
190
- Can be a single `str` or a list of `str`. If a list is used, must be the same length as `col_match`.
191
163
 
192
164
  Returns
193
165
  -------
194
166
  list[str]
195
167
  The list of `term.id` that matched the expected column value.
196
168
  """
197
- term_ids = (
198
- set([
199
- key
200
- for key, value in lookup.items()
201
- if value.get(col_match) == col_match_with
202
- ])
203
- ) if isinstance(lookup, dict) else set(list(lookup[lookup[col_match] == col_match_with].termid))
169
+ filtered_df = lookup[lookup[col_match] == col_match_with]
170
+ term_ids = filtered_df['term.id'].unique().tolist() if 'term.id' in filtered_df.columns else []
204
171
  return list(map(str, term_ids))
205
172
 
206
173
 
@@ -265,13 +232,13 @@ def extract_grouped_data_closest_date(data: str, year: int) -> str:
265
232
  return None if closest_year is None else _parse_value(data_by_date.get(closest_year))
266
233
 
267
234
 
268
- def lookup_term_ids(lookup: Union[dict, numpy.recarray]):
235
+ def lookup_term_ids(lookup: pd.DataFrame):
269
236
  """
270
237
  Get the `term.id` values from a lookup.
271
238
 
272
239
  Parameters
273
240
  ----------
274
- lookup : dict | numpy.recarray
241
+ lookup : DataFrame
275
242
  The value returned by the `download_lookup` function.
276
243
 
277
244
  Returns
@@ -279,16 +246,16 @@ def lookup_term_ids(lookup: Union[dict, numpy.recarray]):
279
246
  list[str]
280
247
  The `term.id` values from the lookup.
281
248
  """
282
- return lookup.keys() if isinstance(lookup, dict) else list(lookup.termid)
249
+ return list(map(str, lookup['term.id'].tolist())) if 'term.id' in lookup.columns else []
283
250
 
284
251
 
285
- def lookup_columns(lookup: Union[dict, numpy.recarray]):
252
+ def lookup_columns(lookup: pd.DataFrame):
286
253
  """
287
254
  Get the columns from a lookup.
288
255
 
289
256
  Parameters
290
257
  ----------
291
- lookup : dict | numpy.recarray
258
+ lookup : DataFrame
292
259
  The value returned by the `download_lookup` function.
293
260
 
294
261
  Returns
@@ -296,4 +263,4 @@ def lookup_columns(lookup: Union[dict, numpy.recarray]):
296
263
  list[str]
297
264
  The columns from the lookup.
298
265
  """
299
- return list(list(lookup.values())[0].keys()) if isinstance(lookup, dict) else list(lookup.dtype.names)
266
+ return list(lookup.columns)
@@ -2,7 +2,7 @@ from functools import lru_cache
2
2
  import json
3
3
  from hestia_earth.schema import SchemaType
4
4
 
5
- from .lookup import _download_lookup_data, download_lookup, get_table_value, column_name
5
+ from .lookup import _download_lookup_data, download_lookup, get_table_value
6
6
  from .api import download_hestia
7
7
  from .tools import non_empty_list, flatten
8
8
 
@@ -45,7 +45,7 @@ def _allowed_model_mapping(model: str, term_id: str, column: str):
45
45
  mapping = _allowed_mapping_data()
46
46
  value = mapping.get(term_id, {}).get(model, {}).get(column) if mapping else get_table_value(
47
47
  download_lookup(f"{(download_hestia(term_id) or {}).get('termType')}-model-{column}.csv"),
48
- 'termid', term_id, column_name(column)
48
+ 'term.id', term_id, column
49
49
  )
50
50
  return (value or _ALLOW_ALL).split(';') if isinstance(value, str) else _ALLOW_ALL
51
51
 
@@ -78,7 +78,7 @@ def _allowed_mapping(term_id: str, column: str):
78
78
  mapping = _allowed_mapping_data()
79
79
  value = mapping.get(term_id, {}).get(column) if mapping else get_table_value(
80
80
  download_lookup(f"{(download_hestia(term_id) or {}).get('termType')}.csv"),
81
- 'termid', term_id, column_name(column)
81
+ 'term.id', term_id, column
82
82
  )
83
83
  return (value or _ALLOW_ALL).split(';') if isinstance(value, str) else _ALLOW_ALL
84
84
 
@@ -174,7 +174,7 @@ def is_in_system_boundary(term_id: str) -> bool:
174
174
  column = 'inHestiaDefaultSystemBoundary'
175
175
  value = mapping.get(term_id, {}).get(column) if mapping else get_table_value(
176
176
  download_lookup(f"{(download_hestia(term_id) or {}).get('termType')}.csv"),
177
- 'termid', term_id, column_name(column)
177
+ 'term.id', term_id, column
178
178
  )
179
179
  # handle numpy bool from table value
180
180
  return not (not value)
@@ -1 +1 @@
1
- VERSION = '0.16.5'
1
+ VERSION = '0.16.7'
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hestia_earth_utils
3
- Version: 0.16.5
3
+ Version: 0.16.7
4
4
  Summary: HESTIA's utils library
5
5
  Home-page: https://gitlab.com/hestia-earth/hestia-utils
6
6
  Author: HESTIA Team
@@ -13,12 +13,8 @@ Requires-Dist: hestia-earth-schema>=35.0.1
13
13
  Requires-Dist: requests>=2.24.0
14
14
  Requires-Dist: urllib3~=1.26.0
15
15
  Requires-Dist: python-dateutil>=2.8.1
16
- Requires-Dist: numpy>=2
16
+ Requires-Dist: pandas>=2
17
17
  Requires-Dist: flatten_json
18
- Provides-Extra: pivot-csv
19
- Requires-Dist: pandas>=2; extra == "pivot-csv"
20
- Provides-Extra: table
21
- Requires-Dist: pandas>=2; extra == "table"
22
18
  Dynamic: author
23
19
  Dynamic: author-email
24
20
  Dynamic: classifier
@@ -26,7 +22,6 @@ Dynamic: description
26
22
  Dynamic: description-content-type
27
23
  Dynamic: home-page
28
24
  Dynamic: license
29
- Dynamic: provides-extra
30
25
  Dynamic: requires-dist
31
26
  Dynamic: requires-python
32
27
  Dynamic: summary
@@ -1,14 +1,13 @@
1
1
  hestia_earth/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  hestia_earth/utils/api.py,sha256=y0gw5pCCHNnFIhM62Hok_5eDtH3QDAZdkye_1mANMNs,9654
3
- hestia_earth/utils/blank_node.py,sha256=1wc9zUkOvFhJS-YmuKexfIdYxfsp5KyJczLmHlW559Q,7375
3
+ hestia_earth/utils/blank_node.py,sha256=kLjq8U0PYyq_SQ-VHGMll_3XxKdYEnHEwtCCglNT3vg,7350
4
4
  hestia_earth/utils/calculation_status.py,sha256=X7lbgVMD9luH1gj9lEcxd3_P2-u7e8ZPGCvX1czPZUo,2238
5
- hestia_earth/utils/csv_utils.py,sha256=BK-tci1sALmsxamSR1Y7f9O6ajTTdhggLC2pBEWhYME,2310
6
5
  hestia_earth/utils/cycle.py,sha256=rFLRL9X4KQ1UrE6fEPA_gV8KmwzrZpR3Ce56zg41lRk,1326
7
6
  hestia_earth/utils/date.py,sha256=SPQ69uxHiv1o3BqIkBKkM5XX_CmS20CB7g6u2rhsdh8,1807
8
7
  hestia_earth/utils/descriptive_stats.py,sha256=EMVwFvg2OnZgKRAfireAoWY2EbrSvqR0V0bK9B53p28,1583
9
8
  hestia_earth/utils/emission.py,sha256=BhBitooLTxZSh82S982v2QfPxxTF1kmGClG_uHyWdz4,1981
10
- hestia_earth/utils/lookup.py,sha256=XKmxFEH9o1Rhi4oTLteQnMAwNXiObjSX7pMfrUw8q1I,9522
11
- hestia_earth/utils/lookup_utils.py,sha256=_k3RZ1pK-gw7jq8wn9HrPWfDl4FlEWRb8bXmgaARu0w,6716
9
+ hestia_earth/utils/lookup.py,sha256=Sea1EkwT1K4mb9eNQBkJHoXkvNLSg_N9eeNiUL6pLq0,8028
10
+ hestia_earth/utils/lookup_utils.py,sha256=P3Ae2MqZWvk3f9AObNwk6Fq9AyyX279K4kR9qHX8rKQ,6667
12
11
  hestia_earth/utils/model.py,sha256=uUcrF07XmBzqLni8VSaP0HoebJnQ57kk0EOmhwYMbfI,4637
13
12
  hestia_earth/utils/pipeline.py,sha256=O-6DPtK0U1lJ51LFGa1gM6pjkBJUfxOjNjY8LxQPXV0,9588
14
13
  hestia_earth/utils/request.py,sha256=bu7hkWKmFdXl2_Feawiam_x32whlclA9oP0asJyC69k,626
@@ -16,7 +15,7 @@ hestia_earth/utils/stats.py,sha256=4t3op10xDJbGxWJEY1Jtyl302PYWyMFwLpsSkMlzQn8,3
16
15
  hestia_earth/utils/table.py,sha256=MOJDo5fQPRDogAty_UXbO9-EXFwz97m0f7--mOM17lQ,2363
17
16
  hestia_earth/utils/term.py,sha256=6LiUSc6KX3IOkfWF6fYkQ2tENCO8ENljcdDypxU6WtA,1060
18
17
  hestia_earth/utils/tools.py,sha256=9GaUJwxL-CTzEOGnRFkUQDVFelPevQSxXrf25vssCVo,4990
19
- hestia_earth/utils/version.py,sha256=RKa3Cna34LUgL3Ye-ubIdZ9B2mS9iFURrl2snrPa3uY,19
18
+ hestia_earth/utils/version.py,sha256=izOjXE-oE9zdUdGeSgNJik6goDuxSRXghKlLPR0OuNE,19
20
19
  hestia_earth/utils/pivot/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
20
  hestia_earth/utils/pivot/_shared.py,sha256=JnyIOzpans45DE2hSa9-4yvNhq8t08lx1IAWGJi6WPQ,1397
22
21
  hestia_earth/utils/pivot/pivot_csv.py,sha256=7f6kMqeb1b3RKANLGeDgVu8G5WC-vXIijHnsJhO-CjI,12022
@@ -26,9 +25,9 @@ hestia_earth/utils/storage/_azure_client.py,sha256=sevCZni04eknMql2DgUsWG23f7u0K
26
25
  hestia_earth/utils/storage/_local_client.py,sha256=IbzziUKY0QS3ybHFfgEpELqvafa7hQnZ-DdGdjQuypE,515
27
26
  hestia_earth/utils/storage/_s3_client.py,sha256=B2yTsf-VfHcRLCKTMes4S_nCXxrZad9umyZx3b5Pu_c,3181
28
27
  hestia_earth/utils/storage/_sns_client.py,sha256=LowUatj78Egu6_Id6Rr7hZjfZx1WguS3lozB3yAwSps,347
29
- hestia_earth_utils-0.16.5.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
30
- hestia_earth_utils-0.16.5.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
31
- hestia_earth_utils-0.16.5.dist-info/METADATA,sha256=PAHew6LMon2UUdlJlGUSE7plh0VzfcBXXsV3SPzLL5A,2030
32
- hestia_earth_utils-0.16.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
33
- hestia_earth_utils-0.16.5.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
34
- hestia_earth_utils-0.16.5.dist-info/RECORD,,
28
+ hestia_earth_utils-0.16.7.data/scripts/hestia-format-upload,sha256=IhLAHHPJqRgUcht-M_EUEsRMbRbMfshig07o488zscM,703
29
+ hestia_earth_utils-0.16.7.data/scripts/hestia-pivot-csv,sha256=0YBuGuyPO8rytod6iwWEKiQdSlr9JLuD001k6U5t6no,1163
30
+ hestia_earth_utils-0.16.7.dist-info/METADATA,sha256=o6sR5_7DeeXBLuKWYMFmg0CWRg3O-Cynh6NVZkI1mC0,1869
31
+ hestia_earth_utils-0.16.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
32
+ hestia_earth_utils-0.16.7.dist-info/top_level.txt,sha256=q0QxKEYx9uLpAD5ZtC7Ypq29smEPfOzEAn7Xv8XHGOQ,13
33
+ hestia_earth_utils-0.16.7.dist-info/RECORD,,
@@ -1,84 +0,0 @@
1
- import io
2
- import csv
3
- import re
4
- import numpy as np
5
-
6
- _MISSING_VALUE = '-'
7
- _MISSING = -99999
8
- _DELIMITER = ','
9
- _QUOTE_CHAR = '"'
10
- ENCODING = 'ISO-8859-1'
11
- # default: " !#$%&'()*+,-./:;<=>?@[\\]^{|}~"
12
- _DELETE_CHARS = " !#$%&'()*,./:;<=>?@^{|}~"
13
-
14
-
15
- def is_missing_value(value): return value == _MISSING_VALUE or value == _MISSING or value == str(_MISSING)
16
-
17
-
18
- def _replace_missing_values(value: str): return str(_MISSING) if str(value) == _MISSING_VALUE else value
19
-
20
-
21
- def _replace_chars(value: str): return re.sub(f'[{re.escape(_DELETE_CHARS)}]', '', value.replace(' ', '_'))
22
-
23
-
24
- def _text_to_csv(csv_content: str):
25
- return csv.reader(io.StringIO(csv_content.strip()), delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
26
-
27
-
28
- def _get_columns(csv_content: str):
29
- try:
30
- reader = _text_to_csv(csv_content)
31
- names = next(reader)
32
- return list(map(_replace_chars, names))
33
- except StopIteration:
34
- return []
35
-
36
-
37
- def _get_rows(csv_content: str):
38
- string_io = io.StringIO(csv_content.strip())
39
- try:
40
- next(string_io)
41
- except StopIteration:
42
- return
43
-
44
- return csv.reader(string_io, delimiter=_DELIMITER, quotechar=_QUOTE_CHAR)
45
-
46
-
47
- def _csv_str_to_recarray_chunks_numpy(csv_content: str, chunk_size: int = 5):
48
- names = _get_columns(csv_content)
49
- num_cols = len(names)
50
-
51
- max_size = 1000
52
- dtype = [(name, f"U{max_size}") for name in names]
53
-
54
- reader = _get_rows(csv_content)
55
-
56
- # 4. Process the file in batches
57
- chunk_rows = []
58
- for row in reader:
59
- if not row:
60
- continue
61
- if len(row) != num_cols:
62
- continue
63
-
64
- # replace missing values
65
- processed_row = tuple(_replace_missing_values(field) for field in row)
66
- chunk_rows.append(processed_row)
67
-
68
- if len(chunk_rows) >= chunk_size:
69
- yield np.array(chunk_rows, dtype=dtype).view(np.recarray)
70
- chunk_rows = []
71
-
72
- if chunk_rows:
73
- yield np.array(chunk_rows, dtype=dtype).view(np.recarray)
74
-
75
-
76
- def csv_str_to_recarray(csv_content: str) -> np.recarray:
77
- array_rows = list(_csv_str_to_recarray_chunks_numpy(csv_content))
78
- return np.hstack(array_rows).view(np.recarray)
79
-
80
-
81
- def csv_file_to_recarray(filepath: str):
82
- with open(filepath, 'r', encoding=ENCODING) as f:
83
- content = f.read()
84
- return csv_str_to_recarray(content)