pokerdf 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pokerdf-1.0.0/LICENSE +21 -0
- pokerdf-1.0.0/PKG-INFO +113 -0
- pokerdf-1.0.0/README.md +92 -0
- pokerdf-1.0.0/pokerdf/__init__.py +0 -0
- pokerdf-1.0.0/pokerdf/core/read_and_convert.py +254 -0
- pokerdf-1.0.0/pokerdf/main.py +79 -0
- pokerdf-1.0.0/pokerdf/regex/regex_execution.py +89 -0
- pokerdf-1.0.0/pokerdf/regex/regex_patterns.py +814 -0
- pokerdf-1.0.0/pokerdf/utils/strings.py +1 -0
- pokerdf-1.0.0/pokerdf/validation/pydantic_modules.py +42 -0
- pokerdf-1.0.0/pyproject.toml +42 -0
pokerdf-1.0.0/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Murilo Amaral
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
pokerdf-1.0.0/PKG-INFO
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
Metadata-Version: 2.3
|
2
|
+
Name: pokerdf
|
3
|
+
Version: 1.0.0
|
4
|
+
Summary: Converts poker hand history files to pandas DataFrames.
|
5
|
+
Author: Murilo Amaral
|
6
|
+
Author-email: murilogmamaral@gmail.com
|
7
|
+
Requires-Python: >=3.11.5
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: Programming Language :: Python :: 3.12
|
10
|
+
Classifier: Programming Language :: Python :: 3.13
|
11
|
+
Requires-Dist: joblib (==1.3.2)
|
12
|
+
Requires-Dist: mypy (>=0.0.0)
|
13
|
+
Requires-Dist: pandas (>=2.0.0)
|
14
|
+
Requires-Dist: pyarrow (>=16.1.0)
|
15
|
+
Requires-Dist: pydantic (>=2.8.0)
|
16
|
+
Requires-Dist: ruff (>=0.0.0)
|
17
|
+
Project-URL: homepage, https://gitlab.com/murilogmamaral/pokerdf
|
18
|
+
Project-URL: repository, https://gitlab.com/murilogmamaral/pokerdf
|
19
|
+
Description-Content-Type: text/markdown
|
20
|
+
|
21
|
+
# PokerDF
|
22
|
+
|
23
|
+
Converts poker hand history files into structured Pandas DataFrames, making it easier to analyze your games.
|
24
|
+
|
25
|
+
Fast and reliable, PokerDF is able to process 3,000 hand history files into _.parquet_ per minute, in a MacBook Air M2 with 8-core CPU.
|
26
|
+
|
27
|
+
Currently supports PokerStars. Make sure hand histories are saved in English.
|
28
|
+
|
29
|
+
## Introduction
|
30
|
+
|
31
|
+
Converting raw hand histories into structured data is the first step toward building a solid poker strategy and maximizing ROI. What are the optimal VPIP, PFR, and C-BET frequencies for No Limit Hold'em 6-Max? In which specific situations is a 3-Bet most profitable? When is bluffing a clear mistake? Once your data is organized in a Pandas DataFrame, the analytical explorations become unlimited, opening new possibilities to fine-tune your decision-making.
|
32
|
+
|
33
|
+
## Installation
|
34
|
+
```
|
35
|
+
pip install pokerdf
|
36
|
+
```
|
37
|
+
|
38
|
+
## Usage
|
39
|
+
Navigate to the folder where you want to save the output:
|
40
|
+
```
|
41
|
+
cd output_directory
|
42
|
+
```
|
43
|
+
Then, run the package like this:
|
44
|
+
```
|
45
|
+
pokerdf convert /path/to/handhistory/folder
|
46
|
+
```
|
47
|
+
|
48
|
+
Once the process is concluded, you will find something like this:
|
49
|
+
```
|
50
|
+
output_directory/
|
51
|
+
└── output/
|
52
|
+
└── 20250510-105423/
|
53
|
+
├── 20200607-T2928873630.parquet
|
54
|
+
├── 20200607-T2928880893.parquet
|
55
|
+
├── 20200607-T2928925240.parquet
|
56
|
+
├── 20200607-T2928950825.parquet
|
57
|
+
├── 20200607-T2928996127.parquet
|
58
|
+
├── 20200607-T2929005994.parquet
|
59
|
+
├── ...
|
60
|
+
├── fail.txt
|
61
|
+
└── success.txt
|
62
|
+
```
|
63
|
+
#### Details
|
64
|
+
1. Inside `output` you’ll find a subfolder named with the session ID, in this case, `20250510-105423`, containing all _.parquet_ files.
|
65
|
+
2. Each hand history file is converted into a _.parquet_ file with the exact same structure, allowing you to concatenate them seamlessly.
|
66
|
+
3. Each _.parquet_ file follows the naming convention _{DATE_OF_TOURNAMENT}-T{TOURNAMENT_ID}.parquet_.
|
67
|
+
4. The file `fail.txt` provides detailed information about any files that failed to process. This file is only generated if there are failures.
|
68
|
+
5. The file `success.txt` lists all successfully converted files.
|
69
|
+
|
70
|
+
## DataFrame structure
|
71
|
+
| Column | Description | Example | Data Type |
|
72
|
+
|-------------------|--------------------------------------------------------------|-----------------------------------|-----------------|
|
73
|
+
| Modality | The type of game being played | Hold'em No Limit | string |
|
74
|
+
| TableSize | Maximum number of players | 6 | int |
|
75
|
+
| BuyIn | The buy-in amount for the tournament | $4.60+$0.40 | string |
|
76
|
+
| TournID | Unique identifier for the tournament | 2928882649 | string |
|
77
|
+
| TableID | Unique identifier for the table inside a tournament | 10 | int |
|
78
|
+
| HandID | Unique identifier for the hand inside a tournament | 215024616736 | string |
|
79
|
+
| LocalTime | Local time when the hand was played | 2020-06-07 07:44:35 | datetime |
|
80
|
+
| Level | Level of the tournament | IV | string |
|
81
|
+
| Ante | Ante amount posted in the hand | 10.00 | float |
|
82
|
+
| Blinds | Big blind and small blind amounts | [10.0, 20.0] | list[float] |
|
83
|
+
| Owner | Owner of the hand history files | ownername | string |
|
84
|
+
| OwnersHand | Cards held by the owner in a specific hand | [9d, Js] | list[string] |
|
85
|
+
| Playing | Number of players active during the hand | 5 | int |
|
86
|
+
| Player | Player involved in the hand | playername | string |
|
87
|
+
| Seat | Seat number of the player | 3 | int |
|
88
|
+
| PostedAnte | Amount the player paid for the ante | 5.00 | float |
|
89
|
+
| PostedBlind | Amount the player paid for the blinds | 50.00 | float |
|
90
|
+
| Position | Player's position at the table | big blind | string |
|
91
|
+
| Stack | Current stack size of the player | 2500.00 | float |
|
92
|
+
| PreflopAction | Actions taken during the preflop stage | [[checks, ]] | list[list[str]] |
|
93
|
+
| FlopAction | Actions taken during the flop stage | [[bets, 840], [calls, 220]] | list[list[str]] |
|
94
|
+
| TurnAction | Actions taken during the turn stage | [[raises, 400], [calls, 500]] | list[list[str]] |
|
95
|
+
| RiverAction | Actions taken during the river stage | [[folds, ]] | list[list[str]] |
|
96
|
+
| AnteAllIn | Whether the player went all-in during the ante | True | bool |
|
97
|
+
| PreflopAllIn | Whether the player went all-in during preflop | False | bool |
|
98
|
+
| FlopAllIn | Whether the player went all-in during the flop | False | bool |
|
99
|
+
| TurnAllIn | Whether the player went all-in during the turn | False | bool |
|
100
|
+
| RiverAllIn | Whether the player went all-in during the river | False | bool |
|
101
|
+
| BoardFlop | Cards dealt on the flop | [4d, Qs, Ad] | list[string] |
|
102
|
+
| BoardTurn | Card dealt on the turn | [4d, Qs, Ad, 7d] | list[string] |
|
103
|
+
| BoardRiver | Card dealt on the river | [4d, Qs, Ad, 7d, 2d] | list[string] |
|
104
|
+
| ShowDown | Player's cards if went to showdown | [Ah, Ac] | list[string] |
|
105
|
+
| CardCombination | Card combination held by the player | three of a kind, Aces | string |
|
106
|
+
| Result | Result of the hand (folded, lost, mucked, non-sd win, won) | won | string |
|
107
|
+
| Balance | Total value won in a hand | 9150.25 | float |
|
108
|
+
| FinalRank | Player's final ranking in the tournament | 1 | int |
|
109
|
+
| Prize | Prize won by the player, if any | 30000.00 | float |
|
110
|
+
|
111
|
+
## License
|
112
|
+
MIT Licence
|
113
|
+
|
pokerdf-1.0.0/README.md
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
# PokerDF
|
2
|
+
|
3
|
+
Converts poker hand history files into structured Pandas DataFrames, making it easier to analyze your games.
|
4
|
+
|
5
|
+
Fast and reliable, PokerDF is able to process 3,000 hand history files into _.parquet_ per minute, in a MacBook Air M2 with 8-core CPU.
|
6
|
+
|
7
|
+
Currently supports PokerStars. Make sure hand histories are saved in English.
|
8
|
+
|
9
|
+
## Introduction
|
10
|
+
|
11
|
+
Converting raw hand histories into structured data is the first step toward building a solid poker strategy and maximizing ROI. What are the optimal VPIP, PFR, and C-BET frequencies for No Limit Hold'em 6-Max? In which specific situations is a 3-Bet most profitable? When is bluffing a clear mistake? Once your data is organized in a Pandas DataFrame, the analytical explorations become unlimited, opening new possibilities to fine-tune your decision-making.
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
```
|
15
|
+
pip install pokerdf
|
16
|
+
```
|
17
|
+
|
18
|
+
## Usage
|
19
|
+
Navigate to the folder where you want to save the output:
|
20
|
+
```
|
21
|
+
cd output_directory
|
22
|
+
```
|
23
|
+
Then, run the package like this:
|
24
|
+
```
|
25
|
+
pokerdf convert /path/to/handhistory/folder
|
26
|
+
```
|
27
|
+
|
28
|
+
Once the process is concluded, you will find something like this:
|
29
|
+
```
|
30
|
+
output_directory/
|
31
|
+
└── output/
|
32
|
+
└── 20250510-105423/
|
33
|
+
├── 20200607-T2928873630.parquet
|
34
|
+
├── 20200607-T2928880893.parquet
|
35
|
+
├── 20200607-T2928925240.parquet
|
36
|
+
├── 20200607-T2928950825.parquet
|
37
|
+
├── 20200607-T2928996127.parquet
|
38
|
+
├── 20200607-T2929005994.parquet
|
39
|
+
├── ...
|
40
|
+
├── fail.txt
|
41
|
+
└── success.txt
|
42
|
+
```
|
43
|
+
#### Details
|
44
|
+
1. Inside `output` you’ll find a subfolder named with the session ID, in this case, `20250510-105423`, containing all _.parquet_ files.
|
45
|
+
2. Each hand history file is converted into a _.parquet_ file with the exact same structure, allowing you to concatenate them seamlessly.
|
46
|
+
3. Each _.parquet_ file follows the naming convention _{DATE_OF_TOURNAMENT}-T{TOURNAMENT_ID}.parquet_.
|
47
|
+
4. The file `fail.txt` provides detailed information about any files that failed to process. This file is only generated if there are failures.
|
48
|
+
5. The file `success.txt` lists all successfully converted files.
|
49
|
+
|
50
|
+
## DataFrame structure
|
51
|
+
| Column | Description | Example | Data Type |
|
52
|
+
|-------------------|--------------------------------------------------------------|-----------------------------------|-----------------|
|
53
|
+
| Modality | The type of game being played | Hold'em No Limit | string |
|
54
|
+
| TableSize | Maximum number of players | 6 | int |
|
55
|
+
| BuyIn | The buy-in amount for the tournament | $4.60+$0.40 | string |
|
56
|
+
| TournID | Unique identifier for the tournament | 2928882649 | string |
|
57
|
+
| TableID | Unique identifier for the table inside a tournament | 10 | int |
|
58
|
+
| HandID | Unique identifier for the hand inside a tournament | 215024616736 | string |
|
59
|
+
| LocalTime | Local time when the hand was played | 2020-06-07 07:44:35 | datetime |
|
60
|
+
| Level | Level of the tournament | IV | string |
|
61
|
+
| Ante | Ante amount posted in the hand | 10.00 | float |
|
62
|
+
| Blinds | Big blind and small blind amounts | [10.0, 20.0] | list[float] |
|
63
|
+
| Owner | Owner of the hand history files | ownername | string |
|
64
|
+
| OwnersHand | Cards held by the owner in a specific hand | [9d, Js] | list[string] |
|
65
|
+
| Playing | Number of players active during the hand | 5 | int |
|
66
|
+
| Player | Player involved in the hand | playername | string |
|
67
|
+
| Seat | Seat number of the player | 3 | int |
|
68
|
+
| PostedAnte | Amount the player paid for the ante | 5.00 | float |
|
69
|
+
| PostedBlind | Amount the player paid for the blinds | 50.00 | float |
|
70
|
+
| Position | Player's position at the table | big blind | string |
|
71
|
+
| Stack | Current stack size of the player | 2500.00 | float |
|
72
|
+
| PreflopAction | Actions taken during the preflop stage | [[checks, ]] | list[list[str]] |
|
73
|
+
| FlopAction | Actions taken during the flop stage | [[bets, 840], [calls, 220]] | list[list[str]] |
|
74
|
+
| TurnAction | Actions taken during the turn stage | [[raises, 400], [calls, 500]] | list[list[str]] |
|
75
|
+
| RiverAction | Actions taken during the river stage | [[folds, ]] | list[list[str]] |
|
76
|
+
| AnteAllIn | Whether the player went all-in during the ante | True | bool |
|
77
|
+
| PreflopAllIn | Whether the player went all-in during preflop | False | bool |
|
78
|
+
| FlopAllIn | Whether the player went all-in during the flop | False | bool |
|
79
|
+
| TurnAllIn | Whether the player went all-in during the turn | False | bool |
|
80
|
+
| RiverAllIn | Whether the player went all-in during the river | False | bool |
|
81
|
+
| BoardFlop | Cards dealt on the flop | [4d, Qs, Ad] | list[string] |
|
82
|
+
| BoardTurn | Card dealt on the turn | [4d, Qs, Ad, 7d] | list[string] |
|
83
|
+
| BoardRiver | Card dealt on the river | [4d, Qs, Ad, 7d, 2d] | list[string] |
|
84
|
+
| ShowDown | Player's cards if went to showdown | [Ah, Ac] | list[string] |
|
85
|
+
| CardCombination | Card combination held by the player | three of a kind, Aces | string |
|
86
|
+
| Result | Result of the hand (folded, lost, mucked, non-sd win, won) | won | string |
|
87
|
+
| Balance | Total value won in a hand | 9150.25 | float |
|
88
|
+
| FinalRank | Player's final ranking in the tournament | 1 | int |
|
89
|
+
| Prize | Prize won by the player, if any | 30000.00 | float |
|
90
|
+
|
91
|
+
## License
|
92
|
+
MIT Licence
|
File without changes
|
@@ -0,0 +1,254 @@
|
|
1
|
+
import os
|
2
|
+
import pandas as pd
|
3
|
+
from typing import List
|
4
|
+
from joblib import Parallel, delayed
|
5
|
+
from pokerdf.validation.pydantic_modules import ValidateInput
|
6
|
+
from pokerdf.utils.strings import PLATFORM
|
7
|
+
from pokerdf.regex.regex_execution import (
|
8
|
+
capture_common_data,
|
9
|
+
capture_general_data_of_the_hand,
|
10
|
+
capture_specific_data_of_the_player,
|
11
|
+
r,
|
12
|
+
)
|
13
|
+
import warnings
|
14
|
+
|
15
|
+
warnings.simplefilter(action="ignore", category=FutureWarning)
|
16
|
+
|
17
|
+
|
18
|
+
def get_files_paths(path: str) -> List[str]:
|
19
|
+
"""
|
20
|
+
Retrieve the paths of relevant files in the specified directory.
|
21
|
+
|
22
|
+
Args:
|
23
|
+
path (str): The directory path to search for files.
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
List[str]: A list of file paths that match the criteria.
|
27
|
+
"""
|
28
|
+
# Get files names
|
29
|
+
list_of_all_files_names = os.listdir(path)
|
30
|
+
|
31
|
+
# Order by ID
|
32
|
+
list_of_all_files_names.sort()
|
33
|
+
|
34
|
+
# Keep relevant files only
|
35
|
+
list_of_selected_files = [
|
36
|
+
file_name
|
37
|
+
for file_name in list_of_all_files_names
|
38
|
+
if file_name.startswith("HH") and file_name.endswith(".txt")
|
39
|
+
]
|
40
|
+
|
41
|
+
# Compose the final path for each file
|
42
|
+
paths = [os.path.join(path, file_name) for file_name in list_of_selected_files]
|
43
|
+
|
44
|
+
return paths
|
45
|
+
|
46
|
+
|
47
|
+
def compose_dataframe() -> pd.DataFrame:
|
48
|
+
"""
|
49
|
+
Create an empty DataFrame with predefined columns to hold poker data.
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
pd.DataFrame: An empty DataFrame with predefined columns.
|
53
|
+
"""
|
54
|
+
# Compose default dataframe
|
55
|
+
df = pd.DataFrame(
|
56
|
+
columns=[
|
57
|
+
"Modality",
|
58
|
+
"TableSize",
|
59
|
+
"BuyIn",
|
60
|
+
"TournID",
|
61
|
+
"TableID",
|
62
|
+
"HandID",
|
63
|
+
"LocalTime",
|
64
|
+
"Level",
|
65
|
+
"Ante",
|
66
|
+
"Blinds",
|
67
|
+
"Owner",
|
68
|
+
"OwnersHand",
|
69
|
+
"Playing",
|
70
|
+
"Player",
|
71
|
+
"Seat",
|
72
|
+
"PostedAnte",
|
73
|
+
"Position",
|
74
|
+
"PostedBlind",
|
75
|
+
"Stack",
|
76
|
+
"PreflopAction",
|
77
|
+
"FlopAction",
|
78
|
+
"TurnAction",
|
79
|
+
"RiverAction",
|
80
|
+
"AnteAllIn",
|
81
|
+
"PreflopAllIn",
|
82
|
+
"FlopAllIn",
|
83
|
+
"TurnAllIn",
|
84
|
+
"RiverAllIn",
|
85
|
+
"BoardFlop",
|
86
|
+
"BoardTurn",
|
87
|
+
"BoardRiver",
|
88
|
+
"ShowDown",
|
89
|
+
"CardCombination",
|
90
|
+
"Result",
|
91
|
+
"Balance",
|
92
|
+
"FinalRank",
|
93
|
+
"Prize",
|
94
|
+
],
|
95
|
+
data=None,
|
96
|
+
)
|
97
|
+
return df
|
98
|
+
|
99
|
+
|
100
|
+
def apply_regex(txt: str) -> pd.DataFrame:
|
101
|
+
"""
|
102
|
+
Apply regex functions to parse the hand history text and collect relevant data.
|
103
|
+
|
104
|
+
Args:
|
105
|
+
txt (str): The text content of the poker hand history file.
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
pd.DataFrame: A DataFrame containing the parsed data from the hand history.
|
109
|
+
"""
|
110
|
+
# Generate dataframe
|
111
|
+
df = compose_dataframe()
|
112
|
+
|
113
|
+
# Spliting tournament's hands in a list
|
114
|
+
list_of_hands_as_text = txt.split(f"{PLATFORM} ")
|
115
|
+
|
116
|
+
# Cleaning list_of_hands_as_text
|
117
|
+
string_to_remove = "\ufeff"
|
118
|
+
if string_to_remove in list_of_hands_as_text:
|
119
|
+
list_of_hands_as_text.remove(string_to_remove)
|
120
|
+
list_of_hands_as_text = [hand for hand in list_of_hands_as_text if hand is not None]
|
121
|
+
list_of_hands_as_text = [hand for hand in list_of_hands_as_text if len(hand) > 0]
|
122
|
+
|
123
|
+
# Capture common info about the tournament
|
124
|
+
common = capture_common_data(list_of_hands_as_text[0].split("\n*** "))
|
125
|
+
|
126
|
+
for hand in list_of_hands_as_text:
|
127
|
+
|
128
|
+
# Split hand in stages (pre-flop/flop/turn/river)
|
129
|
+
splited_hand = hand.split("\n*** ")
|
130
|
+
|
131
|
+
# Capture general info of the hand
|
132
|
+
general = capture_general_data_of_the_hand(splited_hand)
|
133
|
+
|
134
|
+
# Get players
|
135
|
+
players = r.get_players(splited_hand)
|
136
|
+
|
137
|
+
# Iterate over players
|
138
|
+
for player in players:
|
139
|
+
|
140
|
+
# Capture specific info of players' actions
|
141
|
+
specific = capture_specific_data_of_the_player(splited_hand, player)
|
142
|
+
|
143
|
+
# Combine collected info
|
144
|
+
collected_data = {**common, **general, **specific}
|
145
|
+
|
146
|
+
# Validate
|
147
|
+
ValidateInput(**collected_data)
|
148
|
+
|
149
|
+
# Convert to dataframe
|
150
|
+
result = pd.DataFrame(collected_data)
|
151
|
+
|
152
|
+
# Concat to the final results
|
153
|
+
df = pd.concat([df, result])
|
154
|
+
|
155
|
+
return df
|
156
|
+
|
157
|
+
|
158
|
+
def convert_txt_to_tabular_data(path: str) -> pd.DataFrame:
|
159
|
+
"""
|
160
|
+
Convert a poker hand history text file into a structured DataFrame.
|
161
|
+
|
162
|
+
Args:
|
163
|
+
path (str): The path to the .txt file containing the hand history.
|
164
|
+
|
165
|
+
Returns:
|
166
|
+
pd.DataFrame: A DataFrame with parsed data from the hand history.
|
167
|
+
"""
|
168
|
+
with open(path, "r", encoding="utf-8", errors="replace") as file:
|
169
|
+
txt = file.read()
|
170
|
+
result = apply_regex(txt)
|
171
|
+
|
172
|
+
return result
|
173
|
+
|
174
|
+
|
175
|
+
def _save_log(msg: str, destination: str, file_name: str) -> None:
|
176
|
+
"""
|
177
|
+
Save a log message to a file.
|
178
|
+
|
179
|
+
Args:
|
180
|
+
msg (str): The message to be logged.
|
181
|
+
destination (str): The folder where the log file will be saved.
|
182
|
+
file_name (str): The name of the log file.
|
183
|
+
"""
|
184
|
+
# Compose path of the log
|
185
|
+
path = os.path.join(destination, file_name)
|
186
|
+
|
187
|
+
# Open the file
|
188
|
+
file = open(path, "a")
|
189
|
+
|
190
|
+
# Write content
|
191
|
+
file.write(msg + "\n")
|
192
|
+
|
193
|
+
# Close the writing process
|
194
|
+
file.close()
|
195
|
+
|
196
|
+
|
197
|
+
class DataProcessing:
|
198
|
+
"""
|
199
|
+
Process and save a poker hand history file, logging the result.
|
200
|
+
|
201
|
+
Args:
|
202
|
+
path (str): The path to the hand history file.
|
203
|
+
destination (str): The directory where the processed data will be saved.
|
204
|
+
"""
|
205
|
+
|
206
|
+
def __init__(self, path: str, destination: str) -> None:
|
207
|
+
self.path = path
|
208
|
+
self.destination = destination
|
209
|
+
|
210
|
+
def run(self) -> None:
|
211
|
+
"""
|
212
|
+
Trigger the data processing.
|
213
|
+
"""
|
214
|
+
try:
|
215
|
+
|
216
|
+
# Convert text to pd.DataFrame
|
217
|
+
df = convert_txt_to_tabular_data(self.path).reset_index(drop=True)
|
218
|
+
|
219
|
+
# Compose name of the .parquet file (the Tournament ID + the Local Time)
|
220
|
+
clean_datetime = str(df.LocalTime[0]).replace("-", "")[:8]
|
221
|
+
file_name = clean_datetime + "-T" + str(df.TournID[0]) + ".parquet"
|
222
|
+
|
223
|
+
# Path to save the file
|
224
|
+
destination_path = os.path.join(self.destination, file_name)
|
225
|
+
|
226
|
+
# Save the table
|
227
|
+
df.to_parquet(destination_path, index=False)
|
228
|
+
|
229
|
+
# Log / print DONE status
|
230
|
+
msg = " DONE: " + self.path.split("/")[-1]
|
231
|
+
_save_log(msg, self.destination, "success.txt")
|
232
|
+
print(msg)
|
233
|
+
|
234
|
+
except Exception as e:
|
235
|
+
|
236
|
+
# Log / print FAIL status
|
237
|
+
msg = " FAIL: " + self.path.split("/")[-1]
|
238
|
+
msg += " (" + str(e) + ")"
|
239
|
+
_save_log(msg, self.destination, "fail.txt")
|
240
|
+
print(msg)
|
241
|
+
|
242
|
+
|
243
|
+
def execute_in_parallel(source: str, destination: str) -> None:
|
244
|
+
"""
|
245
|
+
Function to run the DataProcessing with multiple cores
|
246
|
+
"""
|
247
|
+
|
248
|
+
# Get all paths
|
249
|
+
all_paths = get_files_paths(source)
|
250
|
+
|
251
|
+
# Run a DataProcessing in parallel.
|
252
|
+
Parallel(n_jobs=-1)(
|
253
|
+
delayed(DataProcessing(path, destination).run)() for path in all_paths
|
254
|
+
)
|
@@ -0,0 +1,79 @@
|
|
1
|
+
import os
|
2
|
+
import sys
|
3
|
+
import datetime
|
4
|
+
|
5
|
+
from pokerdf.core.read_and_convert import execute_in_parallel
|
6
|
+
|
7
|
+
|
8
|
+
def main() -> None:
|
9
|
+
"""
|
10
|
+
Main function to process command line arguments and execute the 'convert' command.
|
11
|
+
|
12
|
+
- Checks if the command is 'convert'.
|
13
|
+
- If 'convert', generates a session ID and creates a destination path.
|
14
|
+
- Executes the pipeline function `execute_in_parallel` to process files from source to destination.
|
15
|
+
|
16
|
+
Raises:
|
17
|
+
SystemExit: If there are not enough arguments or if an invalid command is provided.
|
18
|
+
"""
|
19
|
+
|
20
|
+
if len(sys.argv) < 3:
|
21
|
+
print("Usage: pokerdf convert <path>")
|
22
|
+
sys.exit(1)
|
23
|
+
|
24
|
+
command = sys.argv[1]
|
25
|
+
source_path = sys.argv[2]
|
26
|
+
|
27
|
+
if command == "convert":
|
28
|
+
|
29
|
+
# Check if the source path exists
|
30
|
+
if not os.path.exists(source_path):
|
31
|
+
print(f"The source path '{source_path}' does not exist.")
|
32
|
+
sys.exit(1)
|
33
|
+
# Check if the source path is a directory
|
34
|
+
if not os.path.isdir(source_path):
|
35
|
+
print(f"The source path '{source_path}' is not a directory.")
|
36
|
+
sys.exit(1)
|
37
|
+
# Check if the source path is empty
|
38
|
+
if not os.listdir(source_path):
|
39
|
+
print(f"The source path '{source_path}' is empty.")
|
40
|
+
sys.exit(1)
|
41
|
+
# Check if the source path is a valid poker hand history file
|
42
|
+
if not any(file.endswith(".txt") for file in os.listdir(source_path)):
|
43
|
+
print(
|
44
|
+
f"The source path '{source_path}' does not contain any poker hand history files."
|
45
|
+
)
|
46
|
+
sys.exit(1)
|
47
|
+
|
48
|
+
# Get start time
|
49
|
+
start_time = datetime.datetime.now()
|
50
|
+
|
51
|
+
# Generate session ID
|
52
|
+
session_id = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
|
53
|
+
|
54
|
+
# Generate destionation path
|
55
|
+
destination_path = f"./output/{session_id}"
|
56
|
+
|
57
|
+
# Create folder
|
58
|
+
os.makedirs(destination_path)
|
59
|
+
|
60
|
+
# Execute pipeline
|
61
|
+
execute_in_parallel(source=source_path, destination=destination_path)
|
62
|
+
|
63
|
+
# Get end time
|
64
|
+
end_time = datetime.datetime.now()
|
65
|
+
elapsed_time = end_time - start_time
|
66
|
+
# Get the completed time in hours, minutes, and seconds
|
67
|
+
hours, remainder = divmod(elapsed_time.total_seconds(), 3600)
|
68
|
+
minutes, seconds = divmod(remainder, 60)
|
69
|
+
# Print the completed time in a readable format
|
70
|
+
print(
|
71
|
+
f"Processing completed in {int(hours)} hours, {int(minutes)} minutes, and {int(seconds)} seconds."
|
72
|
+
)
|
73
|
+
|
74
|
+
else:
|
75
|
+
print(f"The command '{command}' does not exist.")
|
76
|
+
|
77
|
+
|
78
|
+
if __name__ == "__main__":
|
79
|
+
main()
|
@@ -0,0 +1,89 @@
|
|
1
|
+
from typing import Any
|
2
|
+
from pokerdf.regex.regex_patterns import RegexPatterns
|
3
|
+
|
4
|
+
r = RegexPatterns()
|
5
|
+
|
6
|
+
|
7
|
+
def capture_common_data(hand: list[str]) -> dict[str, Any]:
|
8
|
+
"""
|
9
|
+
Captures the common data of the tournament
|
10
|
+
|
11
|
+
Args:
|
12
|
+
hand (list): List of texts from a specific hand.
|
13
|
+
|
14
|
+
Returns:
|
15
|
+
dict: Dictionary of captured values.
|
16
|
+
"""
|
17
|
+
row: dict[str, Any] = {}
|
18
|
+
row["Modality"] = r.get_modality(hand)
|
19
|
+
row["TableSize"] = r.get_table_size(hand)
|
20
|
+
row["BuyIn"] = r.get_buyin(hand)
|
21
|
+
row["TournID"] = r.get_tourn_id(hand)
|
22
|
+
row["Owner"] = r.get_owner(hand)
|
23
|
+
|
24
|
+
return row
|
25
|
+
|
26
|
+
|
27
|
+
def capture_general_data_of_the_hand(splitted_hand: list[str]) -> dict[str, Any]:
|
28
|
+
"""
|
29
|
+
Captures the general data of the hand
|
30
|
+
|
31
|
+
Args:
|
32
|
+
hand (list): List of texts from a specific hand.
|
33
|
+
|
34
|
+
Returns:
|
35
|
+
dict: Dictionary of captured values.
|
36
|
+
"""
|
37
|
+
row: dict[str, Any] = {}
|
38
|
+
row["HandID"] = r.get_hand_id(splitted_hand)
|
39
|
+
row["TableID"] = r.get_table_id(splitted_hand)
|
40
|
+
row["LocalTime"] = r.get_time(splitted_hand)
|
41
|
+
row["Level"] = r.get_level(splitted_hand)
|
42
|
+
row["Ante"] = r.get_ante(splitted_hand)
|
43
|
+
row["Blinds"] = r.get_blinds(splitted_hand)
|
44
|
+
row["OwnersHand"] = r.get_owner_cards(splitted_hand)
|
45
|
+
row["Playing"] = r.get_number_of_active_players(splitted_hand)
|
46
|
+
row["BoardFlop"] = r.get_board(splitted_hand, stage="FLOP ***")
|
47
|
+
row["BoardTurn"] = r.get_board(splitted_hand, stage="TURN ***")
|
48
|
+
row["BoardRiver"] = r.get_board(splitted_hand, stage="RIVER ***")
|
49
|
+
|
50
|
+
return row
|
51
|
+
|
52
|
+
|
53
|
+
def capture_specific_data_of_the_player(
|
54
|
+
splitted_hand: list[str], player: str
|
55
|
+
) -> dict[str, Any]:
|
56
|
+
"""
|
57
|
+
Captures the specific data of a player
|
58
|
+
|
59
|
+
Args:
|
60
|
+
hand (list): List of texts from a specific hand.
|
61
|
+
player (str): Name of the player.
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
dict: Dictionary of captured values.
|
65
|
+
"""
|
66
|
+
row: dict[str, Any] = {}
|
67
|
+
row["Player"] = [player]
|
68
|
+
row["Seat"] = r.get_seat(player, splitted_hand)
|
69
|
+
row["PostedAnte"] = r.get_posted_ante(player, splitted_hand)
|
70
|
+
row["Position"] = r.get_position(player, splitted_hand)
|
71
|
+
row["PostedBlind"] = r.get_posted_blind(player, splitted_hand)
|
72
|
+
row["Stack"] = r.get_stack(player, splitted_hand)
|
73
|
+
row["PreflopAction"] = r.get_actions(player, splitted_hand, stage="HOLE CARDS ***")
|
74
|
+
row["FlopAction"] = r.get_actions(player, splitted_hand, stage="FLOP ***")
|
75
|
+
row["TurnAction"] = r.get_actions(player, splitted_hand, stage="TURN ***")
|
76
|
+
row["RiverAction"] = r.get_actions(player, splitted_hand, stage="RIVER ***")
|
77
|
+
row["AnteAllIn"] = r.get_allin(player, splitted_hand, stage=" posts the ante ")
|
78
|
+
row["PreflopAllIn"] = r.get_allin(player, splitted_hand, stage="HOLE CARDS ***")
|
79
|
+
row["FlopAllIn"] = r.get_allin(player, splitted_hand, stage="FLOP ***")
|
80
|
+
row["TurnAllIn"] = r.get_allin(player, splitted_hand, stage="TURN ***")
|
81
|
+
row["RiverAllIn"] = r.get_allin(player, splitted_hand, stage="RIVER ***")
|
82
|
+
row["ShowDown"] = r.get_showed_card(player, splitted_hand)
|
83
|
+
row["CardCombination"] = r.get_card_combination(player, splitted_hand)
|
84
|
+
row["Result"] = r.get_result(player, splitted_hand)
|
85
|
+
row["Balance"] = r.get_balance(player, splitted_hand)
|
86
|
+
row["FinalRank"] = r.get_final_rank(player, splitted_hand)
|
87
|
+
row["Prize"] = r.get_prize(player, splitted_hand)
|
88
|
+
|
89
|
+
return row
|