rand-engine 0.3.3__tar.gz → 0.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,8 +1,7 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: rand-engine
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: Rand Engine v2. Package with some methods to generate random data in different formats. Great to mock data while testing or developing.
5
- Home-page: https://github.com/marcoaureliomenezes/rand_engine
6
5
  Author: marcoaureliomenezes
7
6
  Author-email: marcoaurelioreislima@gmail.com
8
7
  Requires-Python: >=3.10,<4.0
@@ -11,6 +10,8 @@ Classifier: Programming Language :: Python :: 3.10
11
10
  Classifier: Programming Language :: Python :: 3.11
12
11
  Classifier: Programming Language :: Python :: 3.12
13
12
  Classifier: Programming Language :: Python :: 3.13
13
+ Requires-Dist: fastavro (>=1.10.0,<2.0.0)
14
+ Requires-Dist: fastparquet (>=2024.11.0,<2025.0.0)
14
15
  Requires-Dist: numpy (>=2.1.1,<3.0.0)
15
16
  Requires-Dist: pandas (>=2.2.2,<3.0.0)
16
17
  Project-URL: Repository, https://github.com/marcoaureliomenezes/rand_engine
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "rand-engine"
3
- version = "0.3.3"
3
+ version = "0.3.5"
4
4
  description = "Rand Engine v2. Package with some methods to generate random data in different formats. Great to mock data while testing or developing."
5
5
  authors = ["marcoaureliomenezes <marcoaurelioreislima@gmail.com>"]
6
6
  repository = "https://github.com/marcoaureliomenezes/rand_engine"
@@ -10,7 +10,8 @@ readme = "README.md"
10
10
  python = "^3.10"
11
11
  numpy = "^2.1.1"
12
12
  pandas = "^2.2.2"
13
-
13
+ fastavro = "^1.10.0"
14
+ fastparquet = "^2024.11.0"
14
15
 
15
16
  [tool.poetry.group.test.dependencies]
16
17
  pytest = "^8.3.3"
@@ -6,7 +6,13 @@ from datetime import datetime as dt, timedelta
6
6
  class DatetimeCore:
7
7
 
8
8
  @classmethod
9
- def gen_unix_timestamps(self, size: int, start: str, end: str, format: str):
9
+ def gen_unix_timestamps(self, size: int, start: str, end: str, format: str) -> np.ndarray:
10
+ """
11
+ This method generates an array of random unix timestamps.
12
+ :param size: int: Number of elements to be generated.
13
+ :param start: str: Start date of the generated timestamps.
14
+ :param end: str: End date of the generated timestamps.
15
+ :param format: str: Format of the input dates."""
10
16
  dt_start, dt_end = dt.strptime(start, format), dt.strptime(end, format)
11
17
  if dt_start < dt(1970, 1, 1): dt_start = dt(1970, 1, 1)
12
18
  timestamp_start, timestamp_end = dt_start.timestamp(), dt_end.timestamp()
@@ -15,7 +21,15 @@ class DatetimeCore:
15
21
 
16
22
 
17
23
  @classmethod
18
- def gen_timestamps(self, size: int, start: str, end: str, format: str):
24
+ def gen_timestamps(self, size: int, start: str, end: str, format: str) -> np.ndarray:
25
+ """
26
+ This method generates an array of random timestamps.
27
+ :param size: int: Number of elements to be generated.
28
+ :param start: str: Start date of the generated timestamps.
29
+ :param end: str: End date of the generated timestamps.
30
+ :param format: str: Format of the input dates.
31
+ :return: np.ndarray: Array of random timestamps."""
32
+
19
33
  date_array = self.gen_unix_timestamps(size, start, end, format).astype('datetime64[s]')
20
34
  return date_array
21
35
 
@@ -25,16 +39,7 @@ class DatetimeCore:
25
39
  timestamp_array = self.gen_unix_timestamps(size, start, end, format_in)
26
40
  return [dt.fromtimestamp(i).strftime(format_out) for i in timestamp_array]
27
41
 
28
- if __name__ == '__main__':
29
-
30
- pass
31
42
 
32
43
 
33
- # def format_date_array(date_array, format):
34
- # return [datetime.fromtimestamp(i).strftime(format) for i in date_array]
35
- # def handle_format(format):
36
- # return format[randint(0, len(format))] if format == list else \
37
- # format if format == str else "%d-%m-%Y"
38
- # def get_interval(start, end, date_format):
39
- # return datetime.timestamp(datetime.strptime(start, date_format)), \
40
- # datetime.timestamp(datetime.strptime(end, date_format))
44
+ if __name__ == '__main__':
45
+ pass
@@ -7,15 +7,36 @@ class DistinctCore:
7
7
 
8
8
  @classmethod
9
9
  def gen_distincts_typed(self, size: int, distinct: List[Any]) -> np.ndarray:
10
+ """
11
+ This method generates an array of random distinct values.
12
+ :param size: int: Number of elements to be generated.
13
+ :param distinct: List[Any]: List of distinct values to be generated.
14
+ :return: np.ndarray: Array of random distinct values.
15
+ """
10
16
  assert len(list(set([type(x) for x in distinct]))) == 1
11
17
  return np.random.choice(distinct, size)
12
18
 
19
+
13
20
  @classmethod
14
- def gen_distincts_untyped(self, size: int, distinct: List[Any]) -> Iterator:
21
+ def gen_distincts_untyped(self, size: int, distinct: List[Any]) -> List[Any]:
22
+ """
23
+ This method generates an array of random distinct values.
24
+ :param size: int: Number of elements to be generated.
25
+ :param distinct: List[Any]: List of distinct values to be generated.
26
+ :return: Iterator: Iterator of random distinct values.
27
+ """
15
28
  return list(map(lambda x: distinct[x], np.random.randint(0, len(distinct), size)))
16
29
 
17
30
  @classmethod
18
31
  def gen_complex_distincts(self, size: int, pattern="x.x.x-x", replacement="x", templates=[]):
32
+ """
33
+ This method generates an array of random distinct values.
34
+ :param size: int: Number of elements to be generated.
35
+ :param pattern: str: Pattern to be replaced.
36
+ :param replacement: str: Replacement of the pattern.
37
+ :param templates: List[Dict]: List of dictionaries containing the method and parameters to be used in the replacement.
38
+ :return: np.ndarray: Array of random distinct values.
39
+ """
19
40
  assert pattern.count(replacement) == len(templates)
20
41
  list_of_lists, counter = [], 0
21
42
  for replacer_cursor in range(len(pattern)):
@@ -27,6 +48,7 @@ class DistinctCore:
27
48
  return reduce(lambda a, b: a.astype('str') + b.astype('str'), list_of_lists)
28
49
 
29
50
 
51
+
30
52
  if __name__ == '__main__':
31
53
  pass
32
54
 
@@ -6,10 +6,19 @@ class DistinctUtils:
6
6
 
7
7
  @classmethod
8
8
  def handle_distincts_lvl_1(self, distinct_prop, precision):
9
+ """
10
+ This method generates a list of distinct values based on a dictionary of distinct values and their respective frequencies.
11
+ :param distinct_prop: dict: Dictionary containing the distinct values and their respective frequencies.
12
+ :param precision: int: Precision of the distinct values.
13
+ :return: List: List of distinct values.
14
+ """
9
15
  return [ key for key, value in distinct_prop.items() for i in range(value * precision )]
10
16
 
11
17
  @classmethod
12
18
  def handle_distincts_lvl_2(self, distincts, sep=";"):
19
+ """
20
+ This method generates a list of distinct values based on a dictionary of distinct values and their respective frequencies.
21
+ :param distincts: dict: Dictionary containing the distinct values and their respective frequencies."""
13
22
  data_flatted = [f"{j}{sep}{i}" for j in distincts for i in distincts[j]]
14
23
  return data_flatted
15
24
 
@@ -39,16 +48,12 @@ if __name__ == '__main__':
39
48
  distinct_2 = {"OPC": [{"C_OPC": ["PF", "PJ"]}, {"V_OPC": ["NA"]}], "SWP": [{"C_SWP": ["AP"]}, {"V_SWP": ["MA", "ME"]}]}
40
49
  #print(DistinctUtils.handle_distincts_lvl_5(distinct_2)[]
41
50
 
42
-
43
51
  def rec(structure):
44
52
  if isinstance(structure, list):
45
53
  return [rec(i) for i in structure]
46
54
  if isinstance(structure, dict):
47
55
  return [[[k], rec(v)] for k, v in structure.items()]
48
56
  return structure
49
-
50
57
  import numpy as np
51
-
52
58
  result = rec(distinct_2)
53
-
54
59
  combinations = np.array(list(itertools.product(*result)))
@@ -0,0 +1,62 @@
1
+ import numpy as np
2
+
3
+
4
+ class NumericCore:
5
+
6
+
7
+ @classmethod
8
+ def gen_ints(self, size: int, min: int, max: int) -> np.ndarray:
9
+ """
10
+ This method generates an array of random integers.
11
+ :param size: int: Number of elements to be generated.
12
+ :param min: int: Minimum value of the generated numbers.
13
+ :param max: int: Maximum value of the generated numbers.
14
+ :return: np.ndarray: Array of random integers.
15
+ """
16
+ return np.random.randint(min, max + 1, size)
17
+
18
+
19
+ @classmethod
20
+ def gen_ints_zfilled(self, size: int, length: int) -> np.ndarray:
21
+ """
22
+ This method generates an array of random integers with a fixed length.
23
+ :param size: int: Number of elements to be generated.
24
+ :param length: int: Length of the generated numbers.
25
+ :return: np.ndarray: Array of random integers.
26
+ """
27
+ str_arr = np.random.randint(0, 10**length, size).astype('str')
28
+ return np.char.zfill(str_arr, length)
29
+
30
+
31
+ @classmethod
32
+ def gen_floats(self, size: int, min: int, max: int, round: int = 2):
33
+ """
34
+ This method generates an array of random floats.
35
+ :param size: int: Number of elements to be generated.
36
+ :param min: int: Minimum value of the generated numbers.
37
+ :param max: int: Maximum value of the generated numbers.
38
+ :param round: int: Number of decimal places to round the generated numbers. Default is 2.
39
+ :return: np.ndarray: Array of random floats.
40
+ """
41
+ sig_part = np.random.randint(min, max, size)
42
+ decimal = np.random.randint(0, 10 ** round, size)
43
+ return sig_part + (decimal / 10 ** round) if round > 0 else sig_part
44
+
45
+
46
+ @classmethod
47
+ def gen_floats_normal(self, size: int, mean: int, std: int, round: int = 2):
48
+ """
49
+ This method generates an array of random floats with a normal distribution.
50
+ :param size: int: Number of elements to be generated.
51
+ :param mean: int: Mean of the distribution.
52
+ :param std: int: Standard deviation of the distribution.
53
+ :param round: int: Number of decimal places to round the generated numbers. Default is 2.
54
+ :return: np.ndarray: Array of random floats.
55
+ """
56
+ return np.round(np.random.normal(mean, std, size), round)
57
+
58
+
59
+
60
+
61
+ if __name__ == '__main__':
62
+ pass
@@ -0,0 +1,88 @@
1
+ import os
2
+ import time
3
+ import pandas as pd
4
+ from typing import List, Dict, Optional, Generator, Callable, Any
5
+ from rand_engine.main.file_writer import FileWriter
6
+ from rand_engine.main.stream_handle import StreamHandle
7
+
8
+ class DataGenerator:
9
+
10
+ def __init__(self, random_spec):
11
+ self.random_spec = random_spec
12
+ self.actual_dataframe = None
13
+
14
+
15
+ def handle_splitable(self, metadata, df):
16
+ for key, value in metadata.items():
17
+ if value.get("splitable"):
18
+ sep = value.get("sep", ";")
19
+ cols = value.get("cols")
20
+ df[cols] = df[key].str.split(sep, expand=True)
21
+ df.drop(columns=[key], inplace=True)
22
+ return df
23
+
24
+
25
+ def generate_pandas_df(self, size: int, transformer: Optional[Callable]=None) -> pd.DataFrame:
26
+ """
27
+ This method generates a pandas DataFrame based on random data specified in the metadata parameter.
28
+ :param size: int: Number of rows to be generated.
29
+ :param transformer: Optional[Callable]: Function to transform the generated data.
30
+ :return: pd.DataFrame: DataFrame with the generated data.
31
+ """
32
+ assert type(self.random_spec) is dict, "You need to pass a random_spec parameter to generate the random data."
33
+ def first_level():
34
+ dict_data = {key: value["method"](size, **value["parms"]) for key, value in self.random_spec.items()}
35
+ df_pandas = pd.DataFrame(dict_data)
36
+ df_pandas = self.handle_splitable(self.random_spec, df_pandas)
37
+ if transformer: df_pandas = transformer(df_pandas)
38
+ return df_pandas
39
+ self.actual_dataframe = first_level
40
+ return self
41
+
42
+
43
+ def generate_spark_df(self, spark, size: int, transformer: Optional[Callable]=None) -> Any:
44
+ """
45
+ This method generates a Spark DataFrame based on random data specified in the random_spec parameter.
46
+ :param spark: SparkSession: SparkSession object.
47
+ :param size: int: Number of rows to be generated.
48
+ :param transformer: Optional[Callable]: Function to transform the generated data."""
49
+ def second_level():
50
+ self.generate_pandas_df(size=size, transformer=transformer)
51
+ df_spark = spark.createDataFrame(self.actual_dataframe)
52
+ return df_spark
53
+ self.actual_dataframe = second_level
54
+ return self
55
+
56
+
57
+ def get_df(self):
58
+ assert self.actual_dataframe is not None, "You need to generate a DataFrame first."
59
+ return self.actual_dataframe()
60
+
61
+
62
+ def stream_dict(self, min_throughput: int=1, max_throughput: int = 10) -> Generator:
63
+ """
64
+ This method creates a generator of records to be used in a streaming context.
65
+ :param min_throughput: int: Minimum throughput to be generated.
66
+ :param max_throughput: int: Maximum throughput to be generated.
67
+ :return: Generator: Generator of records.
68
+ """
69
+ assert self.actual_dataframe is not None, "You need to generate a DataFrame first."
70
+ while True:
71
+ df_data_microbatch = self.actual_dataframe()
72
+ df_data_parsed = StreamHandle.convert_dt_to_str(df_data_microbatch)
73
+ list_of_records = df_data_parsed.to_dict('records')
74
+ for record in list_of_records:
75
+ record["timestamp_created"] = round(time.time(), 3)
76
+ yield record
77
+ StreamHandle.sleep_to_contro_throughput(min_throughput, max_throughput)
78
+
79
+
80
+ def write(self):
81
+ microbatch_def = lambda: self.actual_dataframe
82
+ return FileWriter(microbatch_def)
83
+
84
+
85
+
86
+ if __name__ == '__main__':
87
+
88
+ pass
@@ -0,0 +1,131 @@
1
+ import os
2
+ from typing import Callable
3
+
4
+
5
+ class FileWriter:
6
+
7
+
8
+ def __init__(self, microbatch_def):
9
+ self.microbatch_def = microbatch_def
10
+ self.write_format = "csv"
11
+ self.write_mode = "overwrite"
12
+ self.write_options = {}
13
+ self.dict_format = {
14
+ "csv": self.to_csv,
15
+ "parquet": self.to_parquet
16
+ }
17
+
18
+ def __handle_fs(self, path, flag=True) -> None:
19
+ """
20
+ This method handles the file system operations.
21
+ :param path: str: Path of the file to be written.
22
+ """
23
+ if self.write_mode == "overwrite":
24
+ try:
25
+ if os.path.exists(path):
26
+ for file in os.listdir(path):
27
+ os.remove(os.path.join(path, file))
28
+ except Exception as e: pass
29
+ if flag == True: to_create = os.path.dirname(path)
30
+ else: to_create = path
31
+ os.makedirs(to_create, exist_ok=True)
32
+
33
+
34
+ def __get_dir_size(self, folder_path: str) -> int:
35
+ """
36
+ This method calculates the size in bytes of a directory.
37
+ :param folder_path: str: Path of the directory.
38
+ :return: int: Size of the directory in bytes.
39
+ """
40
+ total_size = 0
41
+ for dirpath, dirnames, filenames in os.walk(folder_path):
42
+ for filename in filenames:
43
+ file_path = os.path.join(dirpath, filename)
44
+ if not os.path.islink(file_path):
45
+ total_size += os.path.getsize(file_path)
46
+ return total_size
47
+
48
+
49
+ def mode(self, write_mode: str) -> Callable:
50
+ """
51
+ This method sets the write mode of the file.
52
+ :param write_mode: str: Write mode of the file. Default is overwrite.
53
+ :return: FileWriter: Instance of the fileWriter class for method chaining.
54
+ """
55
+ self.write_mode = write_mode
56
+ return self
57
+
58
+
59
+ def format(self, format):
60
+ """
61
+ This method sets the write format of the file.
62
+ :param format: str: Write format of the file. Default is csv. Supported formats are csv and parquet.
63
+ :return: FileWriter: Instance of the fileWriter class for method chaining.
64
+ """
65
+ self.write_format = format
66
+ return self
67
+
68
+
69
+ def option(self, key, value):
70
+ """
71
+ This method sets the write options of the file.
72
+ :param key: str: Key of the write option.
73
+ :param value: Any: Value of the write option.
74
+ :return: FileWriter: Instance of the fileWriter class for method chaining
75
+ """
76
+ self.write_options[key] = value
77
+ return self
78
+
79
+
80
+ def to_csv(self, dataframe, full_path) -> Callable:
81
+ """
82
+ This method writes a pandas DataFrame to a csv file.
83
+ :param dataframe: pd.DataFrame: DataFrame to be written.
84
+ :param full_path: str: Full path of the file to be written.
85
+ :return: Callable: Function to write the Pandas DataFrame to a csv file.
86
+ """
87
+ if self.write_options.get("compression"):
88
+ full_path= full_path.replace("csv", f"csv.{self.write_options['compression']}")
89
+ writer = lambda: dataframe().to_csv(full_path, index=False, **self.write_options)
90
+ return writer
91
+
92
+
93
+ def to_parquet(self, dataframe, full_path):
94
+ """
95
+ This method writes a pandas DataFrame to a parquet file.
96
+ :param dataframe: pd.DataFrame: DataFrame to be written.
97
+ :param full_path: str: Full path of the file to be written.
98
+ :return: Callable: Function to write the Pandas DataFrame to a parquet file.
99
+ """
100
+ if self.write_options.get("compression"):
101
+ full_path= full_path.replace(".parquet", f".{self.write_options['compression']}.parquet")
102
+ writer = lambda: dataframe().to_parquet(full_path, index=False, engine='pyarrow', **self.write_options)
103
+ return writer
104
+
105
+
106
+ def load(self, path: str) -> None:
107
+ """
108
+ This method writes a pandas DataFrame to a file.
109
+ :param path: str: Path of the file to be written.
110
+ """
111
+ self.__handle_fs(path)
112
+ dataframe = self.microbatch_def()
113
+ self.dict_format[self.write_format](dataframe, path)()
114
+
115
+
116
+ def incr_load(self, path, size_in_mb=4):
117
+ """
118
+ This method writes a pandas DataFrame to a file in incremental mode.
119
+ :param path: str: Path of the file to be written.
120
+ :param size_in_mb: int: Size in MB of the file to be written.
121
+ """
122
+ self.__handle_fs(path, flag=False)
123
+ counter = 0
124
+ while True:
125
+ full_path = f"{path}/part-{str(counter).zfill(6)}.{self.write_format}"
126
+ dataframe = self.microbatch_def()
127
+ self.dict_format[self.write_format](dataframe, full_path)()
128
+ size_bytes = self.__get_dir_size(path)
129
+ if counter % 100 == 0: print(f"Size: {size_bytes/2**20:.2f} MB")
130
+ if self.__get_dir_size(path) >= size_in_mb*2**20: break
131
+ counter += 1
@@ -0,0 +1,18 @@
1
+ import random
2
+ import time
3
+ import pandas as pd
4
+
5
+ class StreamHandle:
6
+
7
+ @staticmethod
8
+ def convert_dt_to_str(dataframe: pd.DataFrame) -> pd.DataFrame:
9
+ df_result = dataframe.copy()
10
+ for column in df_result.columns:
11
+ if 'datetime64' in str(df_result[column].dtype):
12
+ df_result[column] = df_result[column].astype(str)
13
+ return df_result
14
+
15
+ @staticmethod
16
+ def sleep_to_contro_throughput(min_throughput: int, max_throughput: int):
17
+ sleep_time = 1 / random.uniform(min_throughput, max_throughput)
18
+ time.sleep(sleep_time)
@@ -1,33 +0,0 @@
1
- import numpy as np
2
-
3
-
4
- class NumericCore:
5
-
6
-
7
- @classmethod
8
- def gen_ints(self, size: int, min: int, max: int):
9
- return np.random.randint(min, max + 1, size)
10
-
11
-
12
- @classmethod
13
- def gen_ints_zfilled(self, size: int, length: int) -> np.ndarray:
14
- str_arr = np.random.randint(0, 10**length, size).astype('str')
15
- return np.char.zfill(str_arr, length)
16
-
17
-
18
- @classmethod
19
- def gen_floats(self, size: int, min: int, max: int, round: int = 2):
20
- sig_part = np.random.randint(min, max, size)
21
- decimal = np.random.randint(0, 10 ** round, size)
22
- return sig_part + (decimal / 10 ** round) if round > 0 else sig_part
23
-
24
-
25
- @classmethod
26
- def gen_floats_normal(self, size: int, mean: int, std: int, round: int = 2):
27
- return np.round(np.random.normal(mean, std, size), round)
28
-
29
-
30
-
31
-
32
- if __name__ == '__main__':
33
- pass
@@ -1,72 +0,0 @@
1
- import os
2
- import time
3
- import random
4
- import pandas as pd
5
- from typing import List, Dict, Optional, Generator, Callable, Any
6
-
7
-
8
- class DataGenerator:
9
-
10
-
11
- def handle_splitable(self, metadata, df):
12
- for key, value in metadata.items():
13
- if value.get("splitable"):
14
- sep = value.get("sep", ";")
15
- cols = value.get("cols")
16
- df[cols] = df[key].str.split(sep, expand=True)
17
- df.drop(columns=[key], inplace=True)
18
- return df
19
-
20
-
21
- def __convert_dt_to_str(self, dataframe: pd.DataFrame) -> pd.DataFrame:
22
- df_result = dataframe.copy()
23
- for column in df_result.columns:
24
- if 'datetime64' in str(df_result[column].dtype):
25
- df_result[column] = df_result[column].astype(str)
26
- return df_result
27
-
28
-
29
- def __sleep_to_contro_throughput(self, min_throughput: int, max_throughput: int):
30
- sleep_time = 1 / random.uniform(min_throughput, max_throughput)
31
- time.sleep(sleep_time)
32
-
33
-
34
- def create_pandas_df(self, size: int, metadata: dict, transformer: Optional[Callable]=None) -> pd.DataFrame:
35
- dict_data = {key: value["method"](size, **value["parms"]) for key, value in metadata.items()}
36
- df_data = pd.DataFrame(dict_data)
37
- df_data_final = self.handle_splitable(metadata, df_data)
38
- if transformer: df_data_final = transformer(df_data_final)
39
- return df_data_final
40
-
41
-
42
- def create_spark_df(self, spark, size: int, metadata: dict, transformer: Optional[Callable]=None) -> Any:
43
- df_data = self.create_pandas_df(size=size, metadata=metadata, transformer=transformer)
44
- df_final = spark.createDataFrame(df_data)
45
- return df_final
46
-
47
-
48
- def create_streaming_records(self, microbatch_size:int, metadata: dict, transformer: Optional[Callable]=None, min_throughput: int=1, max_throughput: int = 10) -> Generator:
49
- while True:
50
- df_data_microbatch = self.create_pandas_df(size=microbatch_size, metadata=metadata, transformer=transformer)
51
- df_data_parsed = self.__convert_dt_to_str(df_data_microbatch)
52
- list_of_records = df_data_parsed.to_dict('records')
53
- for record in list_of_records:
54
- record["timestamp_created"] = round(time.time(), 3)
55
- yield record
56
- self.__sleep_to_contro_throughput(min_throughput, max_throughput)
57
-
58
-
59
- def create_csv_file(self, microbatch_size: int, size_in_mb: int, metadata: dict, path: str, transformer: Optional[Callable]=None) -> None:
60
- os.makedirs(os.path.dirname(path), exist_ok=True)
61
- while True:
62
- df = self.create_pandas_df(size=microbatch_size, metadata=metadata, transformer=transformer)
63
- df.to_csv(path, mode='a', header=False, index=False)
64
- if os.path.getsize(path) > size_in_mb * 1024 * 1024: break
65
-
66
-
67
-
68
-
69
-
70
-
71
- if __name__ == '__main__':
72
- pass
File without changes