blobchat 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- blobchat-0.1.0/PKG-INFO +137 -0
- blobchat-0.1.0/README.md +128 -0
- blobchat-0.1.0/pyproject.toml +21 -0
- blobchat-0.1.0/setup.cfg +4 -0
- blobchat-0.1.0/src/blobchat/__init__.py +7 -0
- blobchat-0.1.0/src/blobchat/__main__.py +4 -0
- blobchat-0.1.0/src/blobchat/cleaner.py +25 -0
- blobchat-0.1.0/src/blobchat/cli.py +21 -0
- blobchat-0.1.0/src/blobchat/py.typed +1 -0
- blobchat-0.1.0/src/blobchat/slang_dict.py +3039 -0
- blobchat-0.1.0/src/blobchat/utils.py +2 -0
- blobchat-0.1.0/src/blobchat.egg-info/PKG-INFO +137 -0
- blobchat-0.1.0/src/blobchat.egg-info/SOURCES.txt +14 -0
- blobchat-0.1.0/src/blobchat.egg-info/dependency_links.txt +1 -0
- blobchat-0.1.0/src/blobchat.egg-info/top_level.txt +1 -0
- blobchat-0.1.0/tests/test_blobchat.py +14 -0
blobchat-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: blobchat
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Chat text cleaning library (slang โ meaningful text)
|
|
5
|
+
Author-email: Prince Kushwaha <p4prince2@email.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.7
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
# BlobChat
|
|
13
|
+
|
|
14
|
+
A lightweight Python library for transforming raw chat text into structured, meaningful data.
|
|
15
|
+
|
|
16
|
+
BlobChat helps you clean, analyze, and convert chat conversations (like WhatsApp, Messenger, etc.) into useful insights โ similar to how pandas works with tabular data.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## ๐ Features
|
|
21
|
+
|
|
22
|
+
- ๐งน Clean and normalize chat text
|
|
23
|
+
|
|
24
|
+
- ๐ฅ Remove emojis and unwanted symbols
|
|
25
|
+
|
|
26
|
+
- ๐ Word count and basic text analysis
|
|
27
|
+
|
|
28
|
+
- ๐ง Simple, chainable API (like pandas)
|
|
29
|
+
|
|
30
|
+
- โก Lightweight and easy to use
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
---
|
|
34
|
+
|
|
35
|
+
## ๐ฆ Installation
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install blobchat
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
> โ ๏ธ Currently under development. For latest version:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
git clone https://github.com/p4prince2/blobchat.git
|
|
45
|
+
cd blobchat
|
|
46
|
+
pip install -e .
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## ๐งช Quick Example
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from blobchat import Chat
|
|
55
|
+
|
|
56
|
+
chat = Chat("Hello BRO!!! ๐๐ฅ How are you?")
|
|
57
|
+
chat.clean().remove_symbols()
|
|
58
|
+
|
|
59
|
+
print(chat.text)
|
|
60
|
+
print(chat.word_count())
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Output:
|
|
64
|
+
|
|
65
|
+
```
|
|
66
|
+
hello bro how are you
|
|
67
|
+
5
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
## ๐ง Planned Features
|
|
73
|
+
|
|
74
|
+
- ๐ Read chat files (WhatsApp, etc.)
|
|
75
|
+
|
|
76
|
+
- ๐ฅ Detect users and messages
|
|
77
|
+
|
|
78
|
+
- ๐
Extract timestamps
|
|
79
|
+
|
|
80
|
+
- ๐ Convert chats to DataFrame
|
|
81
|
+
|
|
82
|
+
- ๐ Analytics (top users, word frequency, etc.)
|
|
83
|
+
|
|
84
|
+
- ๐ Sentiment analysis
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## ๐๏ธ Project Structure
|
|
90
|
+
|
|
91
|
+
```
|
|
92
|
+
blobchat/
|
|
93
|
+
โโโ src/blobchat/
|
|
94
|
+
โ โโโ __init__.py
|
|
95
|
+
โ โโโ core.py
|
|
96
|
+
โโโ tests/
|
|
97
|
+
โโโ README.md
|
|
98
|
+
โโโ pyproject.toml
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
---
|
|
102
|
+
|
|
103
|
+
## ๐งโ๐ป Development
|
|
104
|
+
|
|
105
|
+
Clone the repo and install in editable mode:
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
git clone https://github.com/p4prince2/blobchat.git
|
|
109
|
+
cd blobchat
|
|
110
|
+
pip install -e .
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Run tests:
|
|
114
|
+
|
|
115
|
+
```bash
|
|
116
|
+
pytest
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## ๐ค Contributing
|
|
122
|
+
|
|
123
|
+
Contributions are welcome!
|
|
124
|
+
Feel free to open issues or submit pull requests.
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## ๐ License
|
|
129
|
+
|
|
130
|
+
MIT License
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## ๐จโ๐ป Author
|
|
135
|
+
|
|
136
|
+
**Prince Kushwaha**
|
|
137
|
+
GitHub: [https://github.com/p4prince2](https://github.com/p4prince2)
|
blobchat-0.1.0/README.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
# BlobChat
|
|
4
|
+
|
|
5
|
+
A lightweight Python library for transforming raw chat text into structured, meaningful data.
|
|
6
|
+
|
|
7
|
+
BlobChat helps you clean, analyze, and convert chat conversations (like WhatsApp, Messenger, etc.) into useful insights โ similar to how pandas works with tabular data.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## ๐ Features
|
|
12
|
+
|
|
13
|
+
- ๐งน Clean and normalize chat text
|
|
14
|
+
|
|
15
|
+
- ๐ฅ Remove emojis and unwanted symbols
|
|
16
|
+
|
|
17
|
+
- ๐ Word count and basic text analysis
|
|
18
|
+
|
|
19
|
+
- ๐ง Simple, chainable API (like pandas)
|
|
20
|
+
|
|
21
|
+
- โก Lightweight and easy to use
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## ๐ฆ Installation
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
pip install blobchat
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
> โ ๏ธ Currently under development. For latest version:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
git clone https://github.com/p4prince2/blobchat.git
|
|
36
|
+
cd blobchat
|
|
37
|
+
pip install -e .
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## ๐งช Quick Example
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from blobchat import Chat
|
|
46
|
+
|
|
47
|
+
chat = Chat("Hello BRO!!! ๐๐ฅ How are you?")
|
|
48
|
+
chat.clean().remove_symbols()
|
|
49
|
+
|
|
50
|
+
print(chat.text)
|
|
51
|
+
print(chat.word_count())
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### Output:
|
|
55
|
+
|
|
56
|
+
```
|
|
57
|
+
hello bro how are you
|
|
58
|
+
5
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
---
|
|
62
|
+
|
|
63
|
+
## ๐ง Planned Features
|
|
64
|
+
|
|
65
|
+
- ๐ Read chat files (WhatsApp, etc.)
|
|
66
|
+
|
|
67
|
+
- ๐ฅ Detect users and messages
|
|
68
|
+
|
|
69
|
+
- ๐
Extract timestamps
|
|
70
|
+
|
|
71
|
+
- ๐ Convert chats to DataFrame
|
|
72
|
+
|
|
73
|
+
- ๐ Analytics (top users, word frequency, etc.)
|
|
74
|
+
|
|
75
|
+
- ๐ Sentiment analysis
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## ๐๏ธ Project Structure
|
|
81
|
+
|
|
82
|
+
```
|
|
83
|
+
blobchat/
|
|
84
|
+
โโโ src/blobchat/
|
|
85
|
+
โ โโโ __init__.py
|
|
86
|
+
โ โโโ core.py
|
|
87
|
+
โโโ tests/
|
|
88
|
+
โโโ README.md
|
|
89
|
+
โโโ pyproject.toml
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
---
|
|
93
|
+
|
|
94
|
+
## ๐งโ๐ป Development
|
|
95
|
+
|
|
96
|
+
Clone the repo and install in editable mode:
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
git clone https://github.com/p4prince2/blobchat.git
|
|
100
|
+
cd blobchat
|
|
101
|
+
pip install -e .
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Run tests:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
pytest
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## ๐ค Contributing
|
|
113
|
+
|
|
114
|
+
Contributions are welcome!
|
|
115
|
+
Feel free to open issues or submit pull requests.
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## ๐ License
|
|
120
|
+
|
|
121
|
+
MIT License
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## ๐จโ๐ป Author
|
|
126
|
+
|
|
127
|
+
**Prince Kushwaha**
|
|
128
|
+
GitHub: [https://github.com/p4prince2](https://github.com/p4prince2)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "blobchat"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Chat text cleaning library (slang โ meaningful text)"
|
|
5
|
+
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Prince Kushwaha", email = "p4prince2@email.com" }
|
|
8
|
+
]
|
|
9
|
+
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
requires-python = ">=3.7"
|
|
12
|
+
|
|
13
|
+
license = { text = "MIT" }
|
|
14
|
+
|
|
15
|
+
dependencies = []
|
|
16
|
+
|
|
17
|
+
[tool.setuptools]
|
|
18
|
+
license-files = [] # ๐ฅ IMPORTANT FIX
|
|
19
|
+
|
|
20
|
+
[tool.setuptools.packages.find]
|
|
21
|
+
where = ["src"]
|
blobchat-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from .slang_dict import CHAT_DICT
|
|
3
|
+
|
|
4
|
+
class ChatCleaner:
|
|
5
|
+
|
|
6
|
+
def __init__(self):
|
|
7
|
+
self.slang = CHAT_DICT
|
|
8
|
+
|
|
9
|
+
def expand_slang(self, text):
|
|
10
|
+
words = text.split()
|
|
11
|
+
return " ".join([self.slang.get(w.lower(), w) for w in words])
|
|
12
|
+
|
|
13
|
+
def remove_repeated_chars(self, text):
|
|
14
|
+
return re.sub(r"(.)\1{2,}", r"\1\1", text)
|
|
15
|
+
|
|
16
|
+
def remove_noise(self, text):
|
|
17
|
+
text = re.sub(r"<.*?>", "", text)
|
|
18
|
+
text = re.sub(r"[^\w\s]", "", text)
|
|
19
|
+
return text
|
|
20
|
+
|
|
21
|
+
def clean(self, text):
|
|
22
|
+
text = self.expand_slang(text)
|
|
23
|
+
text = self.remove_repeated_chars(text)
|
|
24
|
+
text = self.remove_noise(text)
|
|
25
|
+
return text.lower()
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Console script for blobchat."""
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
|
|
6
|
+
from blobchat import utils
|
|
7
|
+
|
|
8
|
+
app = typer.Typer()
|
|
9
|
+
console = Console()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@app.command()
|
|
13
|
+
def main() -> None:
|
|
14
|
+
"""Console script for blobchat."""
|
|
15
|
+
console.print("Replace this message by putting your code into blobchat.cli.main")
|
|
16
|
+
console.print("See Typer documentation at https://typer.tiangolo.com/")
|
|
17
|
+
utils.do_something_useful()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
if __name__ == "__main__":
|
|
21
|
+
app()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Marker file for PEP 561
|