pokerdf 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pokerdf/__init__.py +0 -0
- pokerdf/core/read_and_convert.py +254 -0
- pokerdf/main.py +79 -0
- pokerdf/regex/regex_execution.py +89 -0
- pokerdf/regex/regex_patterns.py +814 -0
- pokerdf/utils/strings.py +1 -0
- pokerdf/validation/pydantic_modules.py +42 -0
- pokerdf-1.0.0.dist-info/LICENSE +21 -0
- pokerdf-1.0.0.dist-info/METADATA +113 -0
- pokerdf-1.0.0.dist-info/RECORD +12 -0
- pokerdf-1.0.0.dist-info/WHEEL +4 -0
- pokerdf-1.0.0.dist-info/entry_points.txt +3 -0
pokerdf/__init__.py
ADDED
File without changes
|
@@ -0,0 +1,254 @@
|
|
1
|
+
import os
|
2
|
+
import pandas as pd
|
3
|
+
from typing import List
|
4
|
+
from joblib import Parallel, delayed
|
5
|
+
from pokerdf.validation.pydantic_modules import ValidateInput
|
6
|
+
from pokerdf.utils.strings import PLATFORM
|
7
|
+
from pokerdf.regex.regex_execution import (
|
8
|
+
capture_common_data,
|
9
|
+
capture_general_data_of_the_hand,
|
10
|
+
capture_specific_data_of_the_player,
|
11
|
+
r,
|
12
|
+
)
|
13
|
+
import warnings
|
14
|
+
|
15
|
+
warnings.simplefilter(action="ignore", category=FutureWarning)
|
16
|
+
|
17
|
+
|
18
|
+
def get_files_paths(path: str) -> List[str]:
|
19
|
+
"""
|
20
|
+
Retrieve the paths of relevant files in the specified directory.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
path (str): The directory path to search for files.
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
List[str]: A list of file paths that match the criteria.
|
27
|
+
"""
|
28
|
+
# Get files names
|
29
|
+
list_of_all_files_names = os.listdir(path)
|
30
|
+
|
31
|
+
# Order by ID
|
32
|
+
list_of_all_files_names.sort()
|
33
|
+
|
34
|
+
# Keep relevant files only
|
35
|
+
list_of_selected_files = [
|
36
|
+
file_name
|
37
|
+
for file_name in list_of_all_files_names
|
38
|
+
if file_name.startswith("HH") and file_name.endswith(".txt")
|
39
|
+
]
|
40
|
+
|
41
|
+
# Compose the final path for each file
|
42
|
+
paths = [os.path.join(path, file_name) for file_name in list_of_selected_files]
|
43
|
+
|
44
|
+
return paths
|
45
|
+
|
46
|
+
|
47
|
+
def compose_dataframe() -> pd.DataFrame:
|
48
|
+
"""
|
49
|
+
Create an empty DataFrame with predefined columns to hold poker data.
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
pd.DataFrame: An empty DataFrame with predefined columns.
|
53
|
+
"""
|
54
|
+
# Compose default dataframe
|
55
|
+
df = pd.DataFrame(
|
56
|
+
columns=[
|
57
|
+
"Modality",
|
58
|
+
"TableSize",
|
59
|
+
"BuyIn",
|
60
|
+
"TournID",
|
61
|
+
"TableID",
|
62
|
+
"HandID",
|
63
|
+
"LocalTime",
|
64
|
+
"Level",
|
65
|
+
"Ante",
|
66
|
+
"Blinds",
|
67
|
+
"Owner",
|
68
|
+
"OwnersHand",
|
69
|
+
"Playing",
|
70
|
+
"Player",
|
71
|
+
"Seat",
|
72
|
+
"PostedAnte",
|
73
|
+
"Position",
|
74
|
+
"PostedBlind",
|
75
|
+
"Stack",
|
76
|
+
"PreflopAction",
|
77
|
+
"FlopAction",
|
78
|
+
"TurnAction",
|
79
|
+
"RiverAction",
|
80
|
+
"AnteAllIn",
|
81
|
+
"PreflopAllIn",
|
82
|
+
"FlopAllIn",
|
83
|
+
"TurnAllIn",
|
84
|
+
"RiverAllIn",
|
85
|
+
"BoardFlop",
|
86
|
+
"BoardTurn",
|
87
|
+
"BoardRiver",
|
88
|
+
"ShowDown",
|
89
|
+
"CardCombination",
|
90
|
+
"Result",
|
91
|
+
"Balance",
|
92
|
+
"FinalRank",
|
93
|
+
"Prize",
|
94
|
+
],
|
95
|
+
data=None,
|
96
|
+
)
|
97
|
+
return df
|
98
|
+
|
99
|
+
|
100
|
+
def apply_regex(txt: str) -> pd.DataFrame:
|
101
|
+
"""
|
102
|
+
Apply regex functions to parse the hand history text and collect relevant data.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
txt (str): The text content of the poker hand history file.
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
pd.DataFrame: A DataFrame containing the parsed data from the hand history.
|
109
|
+
"""
|
110
|
+
# Generate dataframe
|
111
|
+
df = compose_dataframe()
|
112
|
+
|
113
|
+
# Spliting tournament's hands in a list
|
114
|
+
list_of_hands_as_text = txt.split(f"{PLATFORM} ")
|
115
|
+
|
116
|
+
# Cleaning list_of_hands_as_text
|
117
|
+
string_to_remove = "\ufeff"
|
118
|
+
if string_to_remove in list_of_hands_as_text:
|
119
|
+
list_of_hands_as_text.remove(string_to_remove)
|
120
|
+
list_of_hands_as_text = [hand for hand in list_of_hands_as_text if hand is not None]
|
121
|
+
list_of_hands_as_text = [hand for hand in list_of_hands_as_text if len(hand) > 0]
|
122
|
+
|
123
|
+
# Capture common info about the tournament
|
124
|
+
common = capture_common_data(list_of_hands_as_text[0].split("\n*** "))
|
125
|
+
|
126
|
+
for hand in list_of_hands_as_text:
|
127
|
+
|
128
|
+
# Split hand in stages (pre-flop/flop/turn/river)
|
129
|
+
splited_hand = hand.split("\n*** ")
|
130
|
+
|
131
|
+
# Capture general info of the hand
|
132
|
+
general = capture_general_data_of_the_hand(splited_hand)
|
133
|
+
|
134
|
+
# Get players
|
135
|
+
players = r.get_players(splited_hand)
|
136
|
+
|
137
|
+
# Iterate over players
|
138
|
+
for player in players:
|
139
|
+
|
140
|
+
# Capture specific info of players' actions
|
141
|
+
specific = capture_specific_data_of_the_player(splited_hand, player)
|
142
|
+
|
143
|
+
# Combine collected info
|
144
|
+
collected_data = {**common, **general, **specific}
|
145
|
+
|
146
|
+
# Validate
|
147
|
+
ValidateInput(**collected_data)
|
148
|
+
|
149
|
+
# Convert to dataframe
|
150
|
+
result = pd.DataFrame(collected_data)
|
151
|
+
|
152
|
+
# Concat to the final results
|
153
|
+
df = pd.concat([df, result])
|
154
|
+
|
155
|
+
return df
|
156
|
+
|
157
|
+
|
158
|
+
def convert_txt_to_tabular_data(path: str) -> pd.DataFrame:
|
159
|
+
"""
|
160
|
+
Convert a poker hand history text file into a structured DataFrame.
|
161
|
+
|
162
|
+
Args:
|
163
|
+
path (str): The path to the .txt file containing the hand history.
|
164
|
+
|
165
|
+
Returns:
|
166
|
+
pd.DataFrame: A DataFrame with parsed data from the hand history.
|
167
|
+
"""
|
168
|
+
with open(path, "r", encoding="utf-8", errors="replace") as file:
|
169
|
+
txt = file.read()
|
170
|
+
result = apply_regex(txt)
|
171
|
+
|
172
|
+
return result
|
173
|
+
|
174
|
+
|
175
|
+
def _save_log(msg: str, destination: str, file_name: str) -> None:
|
176
|
+
"""
|
177
|
+
Save a log message to a file.
|
178
|
+
|
179
|
+
Args:
|
180
|
+
msg (str): The message to be logged.
|
181
|
+
destination (str): The folder where the log file will be saved.
|
182
|
+
file_name (str): The name of the log file.
|
183
|
+
"""
|
184
|
+
# Compose path of the log
|
185
|
+
path = os.path.join(destination, file_name)
|
186
|
+
|
187
|
+
# Open the file
|
188
|
+
file = open(path, "a")
|
189
|
+
|
190
|
+
# Write content
|
191
|
+
file.write(msg + "\n")
|
192
|
+
|
193
|
+
# Close the writing process
|
194
|
+
file.close()
|
195
|
+
|
196
|
+
|
197
|
+
class DataProcessing:
|
198
|
+
"""
|
199
|
+
Process and save a poker hand history file, logging the result.
|
200
|
+
|
201
|
+
Args:
|
202
|
+
path (str): The path to the hand history file.
|
203
|
+
destination (str): The directory where the processed data will be saved.
|
204
|
+
"""
|
205
|
+
|
206
|
+
def __init__(self, path: str, destination: str) -> None:
|
207
|
+
self.path = path
|
208
|
+
self.destination = destination
|
209
|
+
|
210
|
+
def run(self) -> None:
|
211
|
+
"""
|
212
|
+
Trigger the data processing.
|
213
|
+
"""
|
214
|
+
try:
|
215
|
+
|
216
|
+
# Convert text to pd.DataFrame
|
217
|
+
df = convert_txt_to_tabular_data(self.path).reset_index(drop=True)
|
218
|
+
|
219
|
+
# Compose name of the .parquet file (the Tournament ID + the Local Time)
|
220
|
+
clean_datetime = str(df.LocalTime[0]).replace("-", "")[:8]
|
221
|
+
file_name = clean_datetime + "-T" + str(df.TournID[0]) + ".parquet"
|
222
|
+
|
223
|
+
# Path to save the file
|
224
|
+
destination_path = os.path.join(self.destination, file_name)
|
225
|
+
|
226
|
+
# Save the table
|
227
|
+
df.to_parquet(destination_path, index=False)
|
228
|
+
|
229
|
+
# Log / print DONE status
|
230
|
+
msg = " DONE: " + self.path.split("/")[-1]
|
231
|
+
_save_log(msg, self.destination, "success.txt")
|
232
|
+
print(msg)
|
233
|
+
|
234
|
+
except Exception as e:
|
235
|
+
|
236
|
+
# Log / print FAIL status
|
237
|
+
msg = " FAIL: " + self.path.split("/")[-1]
|
238
|
+
msg += " (" + str(e) + ")"
|
239
|
+
_save_log(msg, self.destination, "fail.txt")
|
240
|
+
print(msg)
|
241
|
+
|
242
|
+
|
243
|
+
def execute_in_parallel(source: str, destination: str) -> None:
|
244
|
+
"""
|
245
|
+
Function to run the DataProcessing with multiple cores
|
246
|
+
"""
|
247
|
+
|
248
|
+
# Get all paths
|
249
|
+
all_paths = get_files_paths(source)
|
250
|
+
|
251
|
+
# Run a DataProcessing in parallel.
|
252
|
+
Parallel(n_jobs=-1)(
|
253
|
+
delayed(DataProcessing(path, destination).run)() for path in all_paths
|
254
|
+
)
|
pokerdf/main.py
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
import os
|
2
|
+
import sys
|
3
|
+
import datetime
|
4
|
+
|
5
|
+
from pokerdf.core.read_and_convert import execute_in_parallel
|
6
|
+
|
7
|
+
|
8
|
+
def main() -> None:
|
9
|
+
"""
|
10
|
+
Main function to process command line arguments and execute the 'convert' command.
|
11
|
+
|
12
|
+
- Checks if the command is 'convert'.
|
13
|
+
- If 'convert', generates a session ID and creates a destination path.
|
14
|
+
- Executes the pipeline function `execute_in_parallel` to process files from source to destination.
|
15
|
+
|
16
|
+
Raises:
|
17
|
+
SystemExit: If there are not enough arguments or if an invalid command is provided.
|
18
|
+
"""
|
19
|
+
|
20
|
+
if len(sys.argv) < 3:
|
21
|
+
print("Usage: pokerdf convert <path>")
|
22
|
+
sys.exit(1)
|
23
|
+
|
24
|
+
command = sys.argv[1]
|
25
|
+
source_path = sys.argv[2]
|
26
|
+
|
27
|
+
if command == "convert":
|
28
|
+
|
29
|
+
# Check if the source path exists
|
30
|
+
if not os.path.exists(source_path):
|
31
|
+
print(f"The source path '{source_path}' does not exist.")
|
32
|
+
sys.exit(1)
|
33
|
+
# Check if the source path is a directory
|
34
|
+
if not os.path.isdir(source_path):
|
35
|
+
print(f"The source path '{source_path}' is not a directory.")
|
36
|
+
sys.exit(1)
|
37
|
+
# Check if the source path is empty
|
38
|
+
if not os.listdir(source_path):
|
39
|
+
print(f"The source path '{source_path}' is empty.")
|
40
|
+
sys.exit(1)
|
41
|
+
# Check if the source path is a valid poker hand history file
|
42
|
+
if not any(file.endswith(".txt") for file in os.listdir(source_path)):
|
43
|
+
print(
|
44
|
+
f"The source path '{source_path}' does not contain any poker hand history files."
|
45
|
+
)
|
46
|
+
sys.exit(1)
|
47
|
+
|
48
|
+
# Get start time
|
49
|
+
start_time = datetime.datetime.now()
|
50
|
+
|
51
|
+
# Generate session ID
|
52
|
+
session_id = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
53
|
+
|
54
|
+
# Generate destionation path
|
55
|
+
destination_path = f"./output/{session_id}"
|
56
|
+
|
57
|
+
# Create folder
|
58
|
+
os.makedirs(destination_path)
|
59
|
+
|
60
|
+
# Execute pipeline
|
61
|
+
execute_in_parallel(source=source_path, destination=destination_path)
|
62
|
+
|
63
|
+
# Get end time
|
64
|
+
end_time = datetime.datetime.now()
|
65
|
+
elapsed_time = end_time - start_time
|
66
|
+
# Get the completed time in hours, minutes, and seconds
|
67
|
+
hours, remainder = divmod(elapsed_time.total_seconds(), 3600)
|
68
|
+
minutes, seconds = divmod(remainder, 60)
|
69
|
+
# Print the completed time in a readable format
|
70
|
+
print(
|
71
|
+
f"Processing completed in {int(hours)} hours, {int(minutes)} minutes, and {int(seconds)} seconds."
|
72
|
+
)
|
73
|
+
|
74
|
+
else:
|
75
|
+
print(f"The command '{command}' does not exist.")
|
76
|
+
|
77
|
+
|
78
|
+
if __name__ == "__main__":
|
79
|
+
main()
|
@@ -0,0 +1,89 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from pokerdf.regex.regex_patterns import RegexPatterns
|
3
|
+
|
4
|
+
r = RegexPatterns()
|
5
|
+
|
6
|
+
|
7
|
+
def capture_common_data(hand: list[str]) -> dict[str, Any]:
|
8
|
+
"""
|
9
|
+
Captures the common data of the tournament
|
10
|
+
|
11
|
+
Args:
|
12
|
+
hand (list): List of texts from a specific hand.
|
13
|
+
|
14
|
+
Returns:
|
15
|
+
dict: Dictionary of captured values.
|
16
|
+
"""
|
17
|
+
row: dict[str, Any] = {}
|
18
|
+
row["Modality"] = r.get_modality(hand)
|
19
|
+
row["TableSize"] = r.get_table_size(hand)
|
20
|
+
row["BuyIn"] = r.get_buyin(hand)
|
21
|
+
row["TournID"] = r.get_tourn_id(hand)
|
22
|
+
row["Owner"] = r.get_owner(hand)
|
23
|
+
|
24
|
+
return row
|
25
|
+
|
26
|
+
|
27
|
+
def capture_general_data_of_the_hand(splitted_hand: list[str]) -> dict[str, Any]:
|
28
|
+
"""
|
29
|
+
Captures the general data of the hand
|
30
|
+
|
31
|
+
Args:
|
32
|
+
hand (list): List of texts from a specific hand.
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
dict: Dictionary of captured values.
|
36
|
+
"""
|
37
|
+
row: dict[str, Any] = {}
|
38
|
+
row["HandID"] = r.get_hand_id(splitted_hand)
|
39
|
+
row["TableID"] = r.get_table_id(splitted_hand)
|
40
|
+
row["LocalTime"] = r.get_time(splitted_hand)
|
41
|
+
row["Level"] = r.get_level(splitted_hand)
|
42
|
+
row["Ante"] = r.get_ante(splitted_hand)
|
43
|
+
row["Blinds"] = r.get_blinds(splitted_hand)
|
44
|
+
row["OwnersHand"] = r.get_owner_cards(splitted_hand)
|
45
|
+
row["Playing"] = r.get_number_of_active_players(splitted_hand)
|
46
|
+
row["BoardFlop"] = r.get_board(splitted_hand, stage="FLOP ***")
|
47
|
+
row["BoardTurn"] = r.get_board(splitted_hand, stage="TURN ***")
|
48
|
+
row["BoardRiver"] = r.get_board(splitted_hand, stage="RIVER ***")
|
49
|
+
|
50
|
+
return row
|
51
|
+
|
52
|
+
|
53
|
+
def capture_specific_data_of_the_player(
|
54
|
+
splitted_hand: list[str], player: str
|
55
|
+
) -> dict[str, Any]:
|
56
|
+
"""
|
57
|
+
Captures the specific data of a player
|
58
|
+
|
59
|
+
Args:
|
60
|
+
hand (list): List of texts from a specific hand.
|
61
|
+
player (str): Name of the player.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
dict: Dictionary of captured values.
|
65
|
+
"""
|
66
|
+
row: dict[str, Any] = {}
|
67
|
+
row["Player"] = [player]
|
68
|
+
row["Seat"] = r.get_seat(player, splitted_hand)
|
69
|
+
row["PostedAnte"] = r.get_posted_ante(player, splitted_hand)
|
70
|
+
row["Position"] = r.get_position(player, splitted_hand)
|
71
|
+
row["PostedBlind"] = r.get_posted_blind(player, splitted_hand)
|
72
|
+
row["Stack"] = r.get_stack(player, splitted_hand)
|
73
|
+
row["PreflopAction"] = r.get_actions(player, splitted_hand, stage="HOLE CARDS ***")
|
74
|
+
row["FlopAction"] = r.get_actions(player, splitted_hand, stage="FLOP ***")
|
75
|
+
row["TurnAction"] = r.get_actions(player, splitted_hand, stage="TURN ***")
|
76
|
+
row["RiverAction"] = r.get_actions(player, splitted_hand, stage="RIVER ***")
|
77
|
+
row["AnteAllIn"] = r.get_allin(player, splitted_hand, stage=" posts the ante ")
|
78
|
+
row["PreflopAllIn"] = r.get_allin(player, splitted_hand, stage="HOLE CARDS ***")
|
79
|
+
row["FlopAllIn"] = r.get_allin(player, splitted_hand, stage="FLOP ***")
|
80
|
+
row["TurnAllIn"] = r.get_allin(player, splitted_hand, stage="TURN ***")
|
81
|
+
row["RiverAllIn"] = r.get_allin(player, splitted_hand, stage="RIVER ***")
|
82
|
+
row["ShowDown"] = r.get_showed_card(player, splitted_hand)
|
83
|
+
row["CardCombination"] = r.get_card_combination(player, splitted_hand)
|
84
|
+
row["Result"] = r.get_result(player, splitted_hand)
|
85
|
+
row["Balance"] = r.get_balance(player, splitted_hand)
|
86
|
+
row["FinalRank"] = r.get_final_rank(player, splitted_hand)
|
87
|
+
row["Prize"] = r.get_prize(player, splitted_hand)
|
88
|
+
|
89
|
+
return row
|