pokerdf 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pokerdf/__init__.py ADDED
File without changes
@@ -0,0 +1,254 @@
1
+ import os
2
+ import pandas as pd
3
+ from typing import List
4
+ from joblib import Parallel, delayed
5
+ from pokerdf.validation.pydantic_modules import ValidateInput
6
+ from pokerdf.utils.strings import PLATFORM
7
+ from pokerdf.regex.regex_execution import (
8
+ capture_common_data,
9
+ capture_general_data_of_the_hand,
10
+ capture_specific_data_of_the_player,
11
+ r,
12
+ )
13
+ import warnings
14
+
15
+ warnings.simplefilter(action="ignore", category=FutureWarning)
16
+
17
+
18
+ def get_files_paths(path: str) -> List[str]:
19
+ """
20
+ Retrieve the paths of relevant files in the specified directory.
21
+
22
+ Args:
23
+ path (str): The directory path to search for files.
24
+
25
+ Returns:
26
+ List[str]: A list of file paths that match the criteria.
27
+ """
28
+ # Get files names
29
+ list_of_all_files_names = os.listdir(path)
30
+
31
+ # Order by ID
32
+ list_of_all_files_names.sort()
33
+
34
+ # Keep relevant files only
35
+ list_of_selected_files = [
36
+ file_name
37
+ for file_name in list_of_all_files_names
38
+ if file_name.startswith("HH") and file_name.endswith(".txt")
39
+ ]
40
+
41
+ # Compose the final path for each file
42
+ paths = [os.path.join(path, file_name) for file_name in list_of_selected_files]
43
+
44
+ return paths
45
+
46
+
47
+ def compose_dataframe() -> pd.DataFrame:
48
+ """
49
+ Create an empty DataFrame with predefined columns to hold poker data.
50
+
51
+ Returns:
52
+ pd.DataFrame: An empty DataFrame with predefined columns.
53
+ """
54
+ # Compose default dataframe
55
+ df = pd.DataFrame(
56
+ columns=[
57
+ "Modality",
58
+ "TableSize",
59
+ "BuyIn",
60
+ "TournID",
61
+ "TableID",
62
+ "HandID",
63
+ "LocalTime",
64
+ "Level",
65
+ "Ante",
66
+ "Blinds",
67
+ "Owner",
68
+ "OwnersHand",
69
+ "Playing",
70
+ "Player",
71
+ "Seat",
72
+ "PostedAnte",
73
+ "Position",
74
+ "PostedBlind",
75
+ "Stack",
76
+ "PreflopAction",
77
+ "FlopAction",
78
+ "TurnAction",
79
+ "RiverAction",
80
+ "AnteAllIn",
81
+ "PreflopAllIn",
82
+ "FlopAllIn",
83
+ "TurnAllIn",
84
+ "RiverAllIn",
85
+ "BoardFlop",
86
+ "BoardTurn",
87
+ "BoardRiver",
88
+ "ShowDown",
89
+ "CardCombination",
90
+ "Result",
91
+ "Balance",
92
+ "FinalRank",
93
+ "Prize",
94
+ ],
95
+ data=None,
96
+ )
97
+ return df
98
+
99
+
100
+ def apply_regex(txt: str) -> pd.DataFrame:
101
+ """
102
+ Apply regex functions to parse the hand history text and collect relevant data.
103
+
104
+ Args:
105
+ txt (str): The text content of the poker hand history file.
106
+
107
+ Returns:
108
+ pd.DataFrame: A DataFrame containing the parsed data from the hand history.
109
+ """
110
+ # Generate dataframe
111
+ df = compose_dataframe()
112
+
113
+ # Spliting tournament's hands in a list
114
+ list_of_hands_as_text = txt.split(f"{PLATFORM} ")
115
+
116
+ # Cleaning list_of_hands_as_text
117
+ string_to_remove = "\ufeff"
118
+ if string_to_remove in list_of_hands_as_text:
119
+ list_of_hands_as_text.remove(string_to_remove)
120
+ list_of_hands_as_text = [hand for hand in list_of_hands_as_text if hand is not None]
121
+ list_of_hands_as_text = [hand for hand in list_of_hands_as_text if len(hand) > 0]
122
+
123
+ # Capture common info about the tournament
124
+ common = capture_common_data(list_of_hands_as_text[0].split("\n*** "))
125
+
126
+ for hand in list_of_hands_as_text:
127
+
128
+ # Split hand in stages (pre-flop/flop/turn/river)
129
+ splited_hand = hand.split("\n*** ")
130
+
131
+ # Capture general info of the hand
132
+ general = capture_general_data_of_the_hand(splited_hand)
133
+
134
+ # Get players
135
+ players = r.get_players(splited_hand)
136
+
137
+ # Iterate over players
138
+ for player in players:
139
+
140
+ # Capture specific info of players' actions
141
+ specific = capture_specific_data_of_the_player(splited_hand, player)
142
+
143
+ # Combine collected info
144
+ collected_data = {**common, **general, **specific}
145
+
146
+ # Validate
147
+ ValidateInput(**collected_data)
148
+
149
+ # Convert to dataframe
150
+ result = pd.DataFrame(collected_data)
151
+
152
+ # Concat to the final results
153
+ df = pd.concat([df, result])
154
+
155
+ return df
156
+
157
+
158
+ def convert_txt_to_tabular_data(path: str) -> pd.DataFrame:
159
+ """
160
+ Convert a poker hand history text file into a structured DataFrame.
161
+
162
+ Args:
163
+ path (str): The path to the .txt file containing the hand history.
164
+
165
+ Returns:
166
+ pd.DataFrame: A DataFrame with parsed data from the hand history.
167
+ """
168
+ with open(path, "r", encoding="utf-8", errors="replace") as file:
169
+ txt = file.read()
170
+ result = apply_regex(txt)
171
+
172
+ return result
173
+
174
+
175
+ def _save_log(msg: str, destination: str, file_name: str) -> None:
176
+ """
177
+ Save a log message to a file.
178
+
179
+ Args:
180
+ msg (str): The message to be logged.
181
+ destination (str): The folder where the log file will be saved.
182
+ file_name (str): The name of the log file.
183
+ """
184
+ # Compose path of the log
185
+ path = os.path.join(destination, file_name)
186
+
187
+ # Open the file
188
+ file = open(path, "a")
189
+
190
+ # Write content
191
+ file.write(msg + "\n")
192
+
193
+ # Close the writing process
194
+ file.close()
195
+
196
+
197
+ class DataProcessing:
198
+ """
199
+ Process and save a poker hand history file, logging the result.
200
+
201
+ Args:
202
+ path (str): The path to the hand history file.
203
+ destination (str): The directory where the processed data will be saved.
204
+ """
205
+
206
+ def __init__(self, path: str, destination: str) -> None:
207
+ self.path = path
208
+ self.destination = destination
209
+
210
+ def run(self) -> None:
211
+ """
212
+ Trigger the data processing.
213
+ """
214
+ try:
215
+
216
+ # Convert text to pd.DataFrame
217
+ df = convert_txt_to_tabular_data(self.path).reset_index(drop=True)
218
+
219
+ # Compose name of the .parquet file (the Tournament ID + the Local Time)
220
+ clean_datetime = str(df.LocalTime[0]).replace("-", "")[:8]
221
+ file_name = clean_datetime + "-T" + str(df.TournID[0]) + ".parquet"
222
+
223
+ # Path to save the file
224
+ destination_path = os.path.join(self.destination, file_name)
225
+
226
+ # Save the table
227
+ df.to_parquet(destination_path, index=False)
228
+
229
+ # Log / print DONE status
230
+ msg = " DONE: " + self.path.split("/")[-1]
231
+ _save_log(msg, self.destination, "success.txt")
232
+ print(msg)
233
+
234
+ except Exception as e:
235
+
236
+ # Log / print FAIL status
237
+ msg = " FAIL: " + self.path.split("/")[-1]
238
+ msg += " (" + str(e) + ")"
239
+ _save_log(msg, self.destination, "fail.txt")
240
+ print(msg)
241
+
242
+
243
+ def execute_in_parallel(source: str, destination: str) -> None:
244
+ """
245
+ Function to run the DataProcessing with multiple cores
246
+ """
247
+
248
+ # Get all paths
249
+ all_paths = get_files_paths(source)
250
+
251
+ # Run a DataProcessing in parallel.
252
+ Parallel(n_jobs=-1)(
253
+ delayed(DataProcessing(path, destination).run)() for path in all_paths
254
+ )
pokerdf/main.py ADDED
@@ -0,0 +1,79 @@
1
+ import os
2
+ import sys
3
+ import datetime
4
+
5
+ from pokerdf.core.read_and_convert import execute_in_parallel
6
+
7
+
8
+ def main() -> None:
9
+ """
10
+ Main function to process command line arguments and execute the 'convert' command.
11
+
12
+ - Checks if the command is 'convert'.
13
+ - If 'convert', generates a session ID and creates a destination path.
14
+ - Executes the pipeline function `execute_in_parallel` to process files from source to destination.
15
+
16
+ Raises:
17
+ SystemExit: If there are not enough arguments or if an invalid command is provided.
18
+ """
19
+
20
+ if len(sys.argv) < 3:
21
+ print("Usage: pokerdf convert <path>")
22
+ sys.exit(1)
23
+
24
+ command = sys.argv[1]
25
+ source_path = sys.argv[2]
26
+
27
+ if command == "convert":
28
+
29
+ # Check if the source path exists
30
+ if not os.path.exists(source_path):
31
+ print(f"The source path '{source_path}' does not exist.")
32
+ sys.exit(1)
33
+ # Check if the source path is a directory
34
+ if not os.path.isdir(source_path):
35
+ print(f"The source path '{source_path}' is not a directory.")
36
+ sys.exit(1)
37
+ # Check if the source path is empty
38
+ if not os.listdir(source_path):
39
+ print(f"The source path '{source_path}' is empty.")
40
+ sys.exit(1)
41
+ # Check if the source path is a valid poker hand history file
42
+ if not any(file.endswith(".txt") for file in os.listdir(source_path)):
43
+ print(
44
+ f"The source path '{source_path}' does not contain any poker hand history files."
45
+ )
46
+ sys.exit(1)
47
+
48
+ # Get start time
49
+ start_time = datetime.datetime.now()
50
+
51
+ # Generate session ID
52
+ session_id = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
53
+
54
+ # Generate destionation path
55
+ destination_path = f"./output/{session_id}"
56
+
57
+ # Create folder
58
+ os.makedirs(destination_path)
59
+
60
+ # Execute pipeline
61
+ execute_in_parallel(source=source_path, destination=destination_path)
62
+
63
+ # Get end time
64
+ end_time = datetime.datetime.now()
65
+ elapsed_time = end_time - start_time
66
+ # Get the completed time in hours, minutes, and seconds
67
+ hours, remainder = divmod(elapsed_time.total_seconds(), 3600)
68
+ minutes, seconds = divmod(remainder, 60)
69
+ # Print the completed time in a readable format
70
+ print(
71
+ f"Processing completed in {int(hours)} hours, {int(minutes)} minutes, and {int(seconds)} seconds."
72
+ )
73
+
74
+ else:
75
+ print(f"The command '{command}' does not exist.")
76
+
77
+
78
+ if __name__ == "__main__":
79
+ main()
@@ -0,0 +1,89 @@
1
+ from typing import Any
2
+ from pokerdf.regex.regex_patterns import RegexPatterns
3
+
4
+ r = RegexPatterns()
5
+
6
+
7
+ def capture_common_data(hand: list[str]) -> dict[str, Any]:
8
+ """
9
+ Captures the common data of the tournament
10
+
11
+ Args:
12
+ hand (list): List of texts from a specific hand.
13
+
14
+ Returns:
15
+ dict: Dictionary of captured values.
16
+ """
17
+ row: dict[str, Any] = {}
18
+ row["Modality"] = r.get_modality(hand)
19
+ row["TableSize"] = r.get_table_size(hand)
20
+ row["BuyIn"] = r.get_buyin(hand)
21
+ row["TournID"] = r.get_tourn_id(hand)
22
+ row["Owner"] = r.get_owner(hand)
23
+
24
+ return row
25
+
26
+
27
+ def capture_general_data_of_the_hand(splitted_hand: list[str]) -> dict[str, Any]:
28
+ """
29
+ Captures the general data of the hand
30
+
31
+ Args:
32
+ hand (list): List of texts from a specific hand.
33
+
34
+ Returns:
35
+ dict: Dictionary of captured values.
36
+ """
37
+ row: dict[str, Any] = {}
38
+ row["HandID"] = r.get_hand_id(splitted_hand)
39
+ row["TableID"] = r.get_table_id(splitted_hand)
40
+ row["LocalTime"] = r.get_time(splitted_hand)
41
+ row["Level"] = r.get_level(splitted_hand)
42
+ row["Ante"] = r.get_ante(splitted_hand)
43
+ row["Blinds"] = r.get_blinds(splitted_hand)
44
+ row["OwnersHand"] = r.get_owner_cards(splitted_hand)
45
+ row["Playing"] = r.get_number_of_active_players(splitted_hand)
46
+ row["BoardFlop"] = r.get_board(splitted_hand, stage="FLOP ***")
47
+ row["BoardTurn"] = r.get_board(splitted_hand, stage="TURN ***")
48
+ row["BoardRiver"] = r.get_board(splitted_hand, stage="RIVER ***")
49
+
50
+ return row
51
+
52
+
53
+ def capture_specific_data_of_the_player(
54
+ splitted_hand: list[str], player: str
55
+ ) -> dict[str, Any]:
56
+ """
57
+ Captures the specific data of a player
58
+
59
+ Args:
60
+ hand (list): List of texts from a specific hand.
61
+ player (str): Name of the player.
62
+
63
+ Returns:
64
+ dict: Dictionary of captured values.
65
+ """
66
+ row: dict[str, Any] = {}
67
+ row["Player"] = [player]
68
+ row["Seat"] = r.get_seat(player, splitted_hand)
69
+ row["PostedAnte"] = r.get_posted_ante(player, splitted_hand)
70
+ row["Position"] = r.get_position(player, splitted_hand)
71
+ row["PostedBlind"] = r.get_posted_blind(player, splitted_hand)
72
+ row["Stack"] = r.get_stack(player, splitted_hand)
73
+ row["PreflopAction"] = r.get_actions(player, splitted_hand, stage="HOLE CARDS ***")
74
+ row["FlopAction"] = r.get_actions(player, splitted_hand, stage="FLOP ***")
75
+ row["TurnAction"] = r.get_actions(player, splitted_hand, stage="TURN ***")
76
+ row["RiverAction"] = r.get_actions(player, splitted_hand, stage="RIVER ***")
77
+ row["AnteAllIn"] = r.get_allin(player, splitted_hand, stage=" posts the ante ")
78
+ row["PreflopAllIn"] = r.get_allin(player, splitted_hand, stage="HOLE CARDS ***")
79
+ row["FlopAllIn"] = r.get_allin(player, splitted_hand, stage="FLOP ***")
80
+ row["TurnAllIn"] = r.get_allin(player, splitted_hand, stage="TURN ***")
81
+ row["RiverAllIn"] = r.get_allin(player, splitted_hand, stage="RIVER ***")
82
+ row["ShowDown"] = r.get_showed_card(player, splitted_hand)
83
+ row["CardCombination"] = r.get_card_combination(player, splitted_hand)
84
+ row["Result"] = r.get_result(player, splitted_hand)
85
+ row["Balance"] = r.get_balance(player, splitted_hand)
86
+ row["FinalRank"] = r.get_final_rank(player, splitted_hand)
87
+ row["Prize"] = r.get_prize(player, splitted_hand)
88
+
89
+ return row